# Data Preparation and Transformation
This notebook demonstrates how to process and transform a dataset of credit card transactions, including calculating installment values and dates.

## Step 1: Import Required Libraries
We start by importing the necessary libraries for data manipulation.

In [None]:
import pandas as pd

## Step 2: Load the Dataset
Load the dataset containing transaction details into a pandas DataFrame.

In [None]:
df = pd.read_csv("../data/parcelas_cartao.csv")
df.head()

## Step 3: Data Transformation
### Add Installment Value and Order Columns
We calculate the value of each installment and create a column to represent the order of installments.

In [None]:
# Convert transaction date to datetime format
df["dtTransacao"] = pd.to_datetime(df["dtTransacao"])

# Calculate the value of each installment
df["vlParcela"] = df["vlVenda"] / df["qtParcelas"]

# Create a list of installment orders for each transaction
df["ordemParcela"] = df.apply(lambda row: [i for i in range(row["qtParcelas"])], axis=1)

# Explode the DataFrame to create one row per installment
df_explode = df.explode("ordemParcela")

### Define a Function to Calculate Installment Dates
This function calculates the date of each installment by adding the installment order (in months) to the transaction date.

In [None]:
def calculo_data_parcela(row):
    """
    Calculate the installment date based on the transaction date and installment order.

    Parameters:
        row (pd.Series): A row of a DataFrame containing 'dtTransacao' (transaction date)
                         and 'ordemParcela' (installment order).

    Returns:
        str: The calculated installment date in the format 'YYYY-M'.
    """
    # Add the installment order (in months) to the transaction date
    dt = row["dtTransacao"] + pd.DateOffset(months=row["ordemParcela"])

    # Format the date as 'YYYY-M'
    dt = f"{dt.year}-{dt.month}"
    return dt

### Apply the Function to Calculate Installment Dates
Add a new column to the DataFrame with the calculated installment dates.

In [None]:
# Apply the function to calculate installment dates
df_explode["dtParcela"] = df_explode.apply(calculo_data_parcela, axis=1)

# Display the transformed DataFrame
df_explode

## Step 4: Pivot the Data
Group the data by client ID and installment date, sum the installment values, and pivot the table to create a summary view.

In [None]:
# Group by client ID and installment date, sum the installment values, and pivot the table
(df_explode.groupby(["idCliente", "dtParcela"])
            ["vlParcela"].sum()
            .reset_index()
            .pivot_table(index="idCliente",
                         columns="dtParcela",
                         values="vlParcela",
                         fill_value=0)
)