In [7]:
import pandas as pd
from pathlib import Path

In [9]:
try:
    project_root = Path.cwd().parent
    clean_data_path = project_root / "data" / "processed" / "credit_card_clients_clean.csv"
    df = pd.read_csv(clean_data_path)
except FileNotFoundError:
    print(f"Error: No se encontró el archivo: {clean_data_path}")

In [11]:
bill_amt_cols = ['bill_amt_sept', 'bill_amt_aug', 'bill_amt_july', 'bill_amt_june', 'bill_amt_may', 'bill_amt_april']
pay_amt_cols = ['pay_amt_sept', 'pay_amt_aug', 'pay_amt_july', 'pay_amt_june', 'pay_amt_may', 'pay_amt_april']
pay_status_cols = ['pay_sept', 'pay_aug', 'pay_july', 'pay_june', 'pay_may', 'pay_april']


In [12]:
df_features = df.copy()

In [13]:
for i, month in enumerate(['sept', 'aug', 'july', 'june', 'may', 'april']):
    # Evitar división por cero añadiendo un valor pequeño (epsilon)
    epsilon = 1e-6
    df_features[f'utilization_{month}'] = df_features[f'bill_amt_{month}'] / (df_features['limit_bal'] + epsilon)
    df_features[f'payment_ratio_{month}'] = df_features[f'pay_amt_{month}'] / (df_features[f'bill_amt_{month}'] + epsilon)

In [14]:
df_features['bill_amt_avg'] = df_features[bill_amt_cols].mean(axis=1)
df_features['bill_amt_std'] = df_features[bill_amt_cols].std(axis=1)
df_features['bill_amt_max'] = df_features[bill_amt_cols].max(axis=1)

In [None]:
def calculate_slope(row, cols):
    x = np.array(range(len(cols)))
    y = row[cols].values
    slope, _ = np.polyfit(x, y, 1)
    return slope


In [17]:
df_features['bill_amt_slope'] = df_features.apply(lambda row: calculate_slope(row, bill_amt_cols[::-1]), axis=1)
df_features['pay_amt_slope'] = df_features.apply(lambda row: calculate_slope(row, pay_amt_cols[::-1]), axis=1)

In [18]:
df_features_reg = df.copy()

In [19]:
bill_cols_reg = ['bill_amt_june', 'bill_amt_may', 'bill_amt_april']
pay_cols_reg = ['pay_amt_may', 'pay_amt_april'] # Solo pagos anteriores
pay_status_cols_reg = ['pay_june', 'pay_may', 'pay_april']

In [20]:
for month in ['june', 'may', 'april']:
    epsilon = 1e-6
    df_features_reg[f'utilization_{month}'] = df_features_reg[f'bill_amt_{month}'] / (df_features_reg['limit_bal'] + epsilon)


In [21]:
df_features_reg['bill_amt_avg_3m'] = df_features_reg[bill_cols_reg].mean(axis=1)
df_features_reg['pay_amt_avg_2m'] = df_features_reg[pay_cols_reg].mean(axis=1)
df_features_reg['pay_status_avg_3m'] = df_features_reg[pay_status_cols_reg].mean(axis=1)


In [22]:
df_features_reg['bill_amt_slope_3m'] = df_features_reg.apply(lambda row: calculate_slope(row, bill_cols_reg[::-1]), axis=1)


In [23]:
processed_data_path = project_root / "data" / "processed"
processed_data_path.mkdir(parents=True, exist_ok=True)

In [24]:
path_clasificacion = processed_data_path / "features_clasificacion.csv"
df_features.to_csv(path_clasificacion, index=False)
print(f"DataFrame para clasificación guardado en: {path_clasificacion}")

DataFrame para clasificación guardado en: /Users/edusant/Desktop/personal/blue_tab/proyecto-riesgo-crediticio/data/processed/features_clasificacion.csv


In [25]:
path_regresion = processed_data_path / "features_regresion.csv"
df_features_reg.to_csv(path_regresion, index=False)
print(f"DataFrame para regresión guardado en: {path_regresion}")


DataFrame para regresión guardado en: /Users/edusant/Desktop/personal/blue_tab/proyecto-riesgo-crediticio/data/processed/features_regresion.csv
