# Adquisición de datos

## Functions and libraries

In [43]:
import os
# PATH = "/Users/luanagiusto/TP-1-ML"  # Cambia esto si tu path es diferente
PATH = "C:/Users/julia/OneDrive/Escritorio/Archivos/Capacitación/Maestría/03. Machine Learning/TP"

In [44]:
import pandas as pd
import numpy as np
# from ydata_profiling import ProfileReport
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)
import gc
from fastai.tabular.all import *
from fastbook import *
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
# Nota: Antes de ejecutar este notebook, instala los requisitos con:
# !pip install -r requirements.txt

In [45]:
def data_profiling(df, output_file):
    # Opciones para que sea liviano
    profile = ProfileReport(
        df.sample(20000, random_state=42) if len(df) > 20000 else df,
        title=output_file,
        minimal=True,         # desactiva análisis costosos
        explorative=True      # agrega secciones útiles
    )

    profile.to_file(output_file)  # <-- abre este HTML en el navegador

In [46]:
# Funcion para mostrar un resumen del dataframe
def df_info_summary(df: pd.DataFrame):
    total = len(df)
    non_null = df.notnull().sum()
    nulls = df.isnull().sum()
    dtypes = df.dtypes
    
    resumen = pd.DataFrame({
        "Non-Null Count": non_null,
        "Null Count": nulls,
        "% Null": (nulls / total * 100).round(2),
        "Dtype": dtypes
    })
    print(resumen)

In [47]:
def resumir_por_id(df, id_col='ID', excluir_cols=None, verbose=False, nombre_conteo='n_registros'):
    """
    Sumariza un DataFrame agrupando por una columna ID.
    Calcula métricas estadísticas básicas para columnas numéricas,
    excluyendo las que se indiquen. Incluye conteo total de registros por ID.

    Parámetros:
    - df: DataFrame de entrada con múltiples registros por ID.
    - id_col: nombre de la columna que identifica cada entidad única.
    - excluir_cols: lista de columnas a excluir del resumen (opcional).
    - verbose: si True, imprime columnas incluidas y excluidas.
    - nombre_conteo: nombre de la columna que indica cantidad de registros por ID.

    Retorna:
    - DataFrame con una fila por ID y métricas estadísticas por columna.
    """
    if excluir_cols is None:
        excluir_cols = []

    excluir_set = set(excluir_cols)
    if id_col in excluir_set:
        excluir_set.remove(id_col)

    numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
    cols_a_resumir = [col for col in numeric_cols if col not in excluir_set and col != id_col]

    if verbose:
        print(f"Columnas excluidas: {sorted(excluir_set)}")
        print(f"Columnas resumidas: {sorted(cols_a_resumir)}")

    # Agregaciones estadísticas
    agg_funcs = ['mean', 'min', 'max', 'median', 'sum']
    agg_dict = {col: agg_funcs for col in cols_a_resumir}

    # Agregar conteo de registros por ID
    df[nombre_conteo] = 1
    agg_dict[nombre_conteo] = ['count']

    resumen = df.groupby(id_col).agg(agg_dict)
    resumen.columns = [f"{col}_{stat}" for col, stat in resumen.columns]
    resumen = resumen.reset_index()

    return resumen

In [48]:
def procesar_tabular_fastai(df, id_col=None, excluir_cols=None, verbose=True):
    """
    Procesa un DataFrame tabular con FastAI sin splits.
    Convierte object → category, aplica Categorify, FillMissing y Normalize.
    Excluye columnas del procesamiento pero las reincorpora al final.

    Parámetros:
    - df: DataFrame original
    - id_col: columna ID a excluir del procesamiento (opcional)
    - excluir_cols: lista de columnas a excluir del procesamiento pero mantener en el resultado
    - verbose: si True, muestra trazabilidad

    Retorna:
    - df_limpio: DataFrame procesado con nulos imputados y variables transformadas
    """
    df = df.copy()

    # Convertir object a category
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].astype('category')

    # Definir exclusiones
    excluir = set(excluir_cols or [])
    if id_col:
        excluir.add(id_col)

    # Detectar columnas categóricas y numéricas excluyendo las indicadas
    cat_names = df.select_dtypes(include='category').columns.difference(excluir).tolist()
    cont_names = df.select_dtypes(include=['float32', 'float64', 'int32', 'int16', 'int8', 'int64']).columns.difference(excluir).tolist()

    if verbose:
        print(f"Columnas categóricas procesadas: {cat_names}")
        print(f"Columnas numéricas procesadas: {cont_names}")
        print(f"Columnas excluidas del procesamiento: {sorted(excluir)}")

    # Crear splits manuales (todo el dataset)
    splits = [list(range(len(df)))]

    # Procesar con FastAI
    to = TabularPandas(
        df,
        procs=[Categorify, FillMissing, Normalize],
        cat_names=cat_names,
        cont_names=cont_names,
        splits=splits
    )

    # Extraer DataFrame procesado
    df_limpio = to.train.xs.copy()

    # Reincorporar columnas excluidas sin transformar
    for col in excluir:
        df_limpio[col] = df[col].values

    return df_limpio

## Bureau data

### Data import and overview

In [49]:
# Create data profiles for bureau and bureau_balance datasets

bureau_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/bureau.csv"))
bureau_balance_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/bureau_balance.csv"))

# data_profiling(bureau_df, "bureau_df_profile.html")
# data_profiling(bureau_balance_df, "bureau_df_balance_profile.html")


In [50]:
# Mostrar las primeras filas de bureau dataset
bureau_df.head()

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,


In [51]:
len(bureau_df)

1716428

In [52]:
# Display the first few rows of the bureau balance data set
bureau_balance_df.head()

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


In [53]:
len(bureau_balance_df)

27299925

In [54]:
# Info sobre las columnas del bureau_df
"""
1. **SK_ID_CURR**
   * ID del cliente (llave para unir con `application_{train|test}.csv`).
2. **SK_ID_BUREAU**
   * ID único del préstamo en el Buró de Crédito (llave para unir con `bureau_balance.csv`).
3. **CREDIT_ACTIVE**
   * Estado actual del crédito reportado.
   * Valores: *Active, Closed, Sold, Bad debt*.
4. **CREDIT_CURRENCY**
   * Moneda en que está registrado el crédito en el Buró.
5. **DAYS_CREDIT**
   * Días relativos a la fecha de aplicación en Home Credit en que se otorgó este préstamo externo.
   * Ej: `-1000` → el préstamo fue otorgado 1000 días antes de la aplicación.
6. **CREDIT_DAY_OVERDUE**
   * Número de días de atraso en pagos en el momento de la aplicación (si aplica).
7. **DAYS_CREDIT_ENDDATE**
   * Duración **restante** del crédito (en días) al momento de la aplicación.
   * Positivo = le quedan días para terminar.
   * Negativo = ya debería haber finalizado.
8. **DAYS_ENDDATE_FACT**
   * Días desde la **finalización real** del crédito, al momento de la aplicación (solo si está cerrado).
   * Negativo = terminó antes de la aplicación.
9. **AMT_CREDIT_MAX_OVERDUE**
   * Monto máximo de deuda vencida registrado durante la vida de ese crédito.
10. **CNT_CREDIT_PROLONG**
    * Cantidad de veces que se extendió/prorrogó este crédito.
11. **AMT_CREDIT_SUM**
    * Monto actual del crédito según Buró.
12. **AMT_CREDIT_SUM_DEBT**
    * Monto actual de deuda pendiente de ese crédito.
13. **AMT_CREDIT_SUM_LIMIT**
    * Límite actual de crédito (si es aplicable, ej. tarjeta).
14. **AMT_CREDIT_SUM_OVERDUE**
    * Monto actual vencido en ese crédito.
15. **CREDIT_TYPE**
    * Tipo de crédito según Buró (ej: *Car loan, Consumer credit, Mortgage, Credit card*).
16. **DAYS_CREDIT_UPDATE**
    * Días relativos a la aplicación en que se actualizó por última vez la info del crédito en el Buró.
17. **AMT_ANNUITY**
    * Monto de la cuota periódica (anualidad) reportado en el Buró para este crédito.
"""
df_info_summary(bureau_df)

                        Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                     1716428           0    0.00    int64
SK_ID_BUREAU                   1716428           0    0.00    int64
CREDIT_ACTIVE                  1716428           0    0.00   object
CREDIT_CURRENCY                1716428           0    0.00   object
DAYS_CREDIT                    1716428           0    0.00    int64
CREDIT_DAY_OVERDUE             1716428           0    0.00    int64
DAYS_CREDIT_ENDDATE            1610875      105553    6.15  float64
DAYS_ENDDATE_FACT              1082775      633653   36.92  float64
AMT_CREDIT_MAX_OVERDUE          591940     1124488   65.51  float64
CNT_CREDIT_PROLONG             1716428           0    0.00    int64
AMT_CREDIT_SUM                 1716415          13    0.00  float64
AMT_CREDIT_SUM_DEBT            1458759      257669   15.01  float64
AMT_CREDIT_SUM_LIMIT           1124648      591780   34.48  float64
AMT_CREDIT_SUM_OVERDUE         1716428          

In [55]:
# Info sobre las columnas del bureau_balance_df
"""
SK_ID_BUREAU → vincula con bureau.
MONTHS_BALANCE → mes relativo a la aplicación actual (ej. -1 = mes anterior, -6 = seis meses antes).
STATUS → estado en ese mes:
0 = al día (DPD 0)
1 = atraso 1–30 días
2 = atraso 31–60
3 = atraso 61–90
4 = atraso 91–120
5 = atraso 120+ o vendido/castigado
C = cerrado
X = desconocido

**DPD = Days Past Due
"""

df_info_summary(bureau_balance_df)

                Non-Null Count  Null Count  % Null   Dtype
SK_ID_BUREAU          27299925           0     0.0   int64
MONTHS_BALANCE        27299925           0     0.0   int64
STATUS                27299925           0     0.0  object


### Data prep - bureau.csv 

In [56]:
bureau_df_fastai = procesar_tabular_fastai(
    bureau_df,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_BUREAU'],
    verbose=True
)

df_info_summary(bureau_df_fastai)

Columnas categóricas procesadas: ['CREDIT_ACTIVE', 'CREDIT_CURRENCY', 'CREDIT_TYPE']
Columnas numéricas procesadas: ['AMT_ANNUITY', 'AMT_CREDIT_MAX_OVERDUE', 'AMT_CREDIT_SUM', 'AMT_CREDIT_SUM_DEBT', 'AMT_CREDIT_SUM_LIMIT', 'AMT_CREDIT_SUM_OVERDUE', 'CNT_CREDIT_PROLONG', 'CREDIT_DAY_OVERDUE', 'DAYS_CREDIT', 'DAYS_CREDIT_ENDDATE', 'DAYS_CREDIT_UPDATE', 'DAYS_ENDDATE_FACT']
Columnas excluidas del procesamiento: ['SK_ID_BUREAU', 'SK_ID_CURR']
                           Non-Null Count  Null Count  % Null    Dtype
CREDIT_ACTIVE                     1716428           0     0.0     int8
CREDIT_CURRENCY                   1716428           0     0.0     int8
CREDIT_TYPE                       1716428           0     0.0     int8
AMT_ANNUITY_na                    1716428           0     0.0     int8
AMT_CREDIT_MAX_OVERDUE_na         1716428           0     0.0     int8
AMT_CREDIT_SUM_na                 1716428           0     0.0     int8
AMT_CREDIT_SUM_DEBT_na            1716428           0     0.

In [57]:
bureau_df_fastai.head()

Unnamed: 0,CREDIT_ACTIVE,CREDIT_CURRENCY,CREDIT_TYPE,AMT_ANNUITY_na,AMT_CREDIT_MAX_OVERDUE_na,AMT_CREDIT_SUM_na,AMT_CREDIT_SUM_DEBT_na,AMT_CREDIT_SUM_LIMIT_na,DAYS_CREDIT_ENDDATE_na,DAYS_ENDDATE_FACT_na,AMT_ANNUITY,AMT_CREDIT_MAX_OVERDUE,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CNT_CREDIT_PROLONG,CREDIT_DAY_OVERDUE,DAYS_CREDIT,DAYS_CREDIT_ENDDATE,DAYS_CREDIT_UPDATE,DAYS_ENDDATE_FACT,SK_ID_CURR,SK_ID_BUREAU
0,3,1,4,2,2,1,1,2,1,1,-0.025735,-0.010902,-0.229317,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,0.811288,-0.126347,0.64204,1.438372,215354,5714462
1,1,1,5,2,2,1,1,2,1,2,-0.025735,-0.010902,-0.113056,0.087541,-0.111609,-0.006385,-0.06662,-0.022388,1.174735,0.127244,0.796047,0.133273,215354,5714463
2,1,1,4,2,2,1,2,2,1,2,-0.025735,-0.010902,0.095086,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,1.181023,0.014284,0.801596,0.133273,215354,5714464
3,1,1,5,2,2,1,2,2,2,2,-0.025735,-0.010902,-0.230467,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,1.181023,-0.162899,0.801596,0.133273,215354,5714465
4,1,1,4,2,1,1,2,2,1,2,-0.025735,0.631001,2.039479,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,0.645285,0.152438,0.794659,0.133273,215354,5714466


### Data prep - bureau_balance.csv

In [58]:
df_info_summary(bureau_balance_df)

                Non-Null Count  Null Count  % Null   Dtype
SK_ID_BUREAU          27299925           0     0.0   int64
MONTHS_BALANCE        27299925           0     0.0   int64
STATUS                27299925           0     0.0  object


In [59]:
bureau_balance_df_fastai = procesar_tabular_fastai(
    bureau_balance_df,
    id_col='SK_ID_BUREAU',
    excluir_cols=None,
    verbose=True
)

df_info_summary(bureau_balance_df_fastai)

Columnas categóricas procesadas: ['STATUS']
Columnas numéricas procesadas: ['MONTHS_BALANCE']
Columnas excluidas del procesamiento: ['SK_ID_BUREAU']
                Non-Null Count  Null Count  % Null    Dtype
STATUS                27299925           0     0.0     int8
MONTHS_BALANCE        27299925           0     0.0  float64
SK_ID_BUREAU          27299925           0     0.0    int64


In [60]:
# Agrupar por SK_ID_BUREAU y sumar
status_counts = bureau_balance_df_fastai.groupby("SK_ID_BUREAU", as_index=False).sum()

# Renombrar columnas para que sean más claras
status_counts = status_counts.rename(columns={'SUM_STATUS_0': 'MONTHS_WITH_STATUS_DPD_0',
    'SUM_STATUS_1': 'MONTHS_WITH_STATUS_DPD_1to30',
    'SUM_STATUS_2': 'MONTHS_WITH_STATUS_DPD_31to60',
    'SUM_STATUS_3': 'MONTHS_WITH_STATUS_DPD_61to90',
    'SUM_STATUS_4': 'MONTHS_WITH_STATUS_DPD_91to120',
    'SUM_STATUS_5': 'MONTHS_WITH_STATUS_DPD_over120',
    'SUM_STATUS_C': 'MONTHS_WITH_STATUS_CLOSED',
    'SUM_STATUS_X': 'MONTHS_WITH_STATUS_UNKNOWN'
    })

In [61]:
df_info_summary(status_counts)

                Non-Null Count  Null Count  % Null    Dtype
SK_ID_BUREAU            817395           0     0.0    int64
STATUS                  817395           0     0.0    int64
MONTHS_BALANCE          817395           0     0.0  float64


### Bureau datasets join

In [62]:
print("rows before join:", len(bureau_df_fastai))

rows before join: 1716428


In [63]:
# Join final de tablas bureau y bureau_balance agregando los conteos de status
bureau_df_fastai = bureau_df_fastai.merge(status_counts, on="SK_ID_BUREAU", how="left")

# # Rellenar NaN con 0 y casteo a int32
# status_cols = [col for col in bureau_df_join.columns if col.startswith("MONTHS_WITH_STATUS_")]
# bureau_df_join[status_cols] = bureau_df_join[status_cols].fillna(0).astype("int8")

In [64]:
print("rows after join:", len(bureau_df_fastai))

rows after join: 1716428


In [65]:
bureau_df_fastai.head()

Unnamed: 0,CREDIT_ACTIVE,CREDIT_CURRENCY,CREDIT_TYPE,AMT_ANNUITY_na,AMT_CREDIT_MAX_OVERDUE_na,AMT_CREDIT_SUM_na,AMT_CREDIT_SUM_DEBT_na,AMT_CREDIT_SUM_LIMIT_na,DAYS_CREDIT_ENDDATE_na,DAYS_ENDDATE_FACT_na,AMT_ANNUITY,AMT_CREDIT_MAX_OVERDUE,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CNT_CREDIT_PROLONG,CREDIT_DAY_OVERDUE,DAYS_CREDIT,DAYS_CREDIT_ENDDATE,DAYS_CREDIT_UPDATE,DAYS_ENDDATE_FACT,SK_ID_CURR,SK_ID_BUREAU,STATUS,MONTHS_BALANCE
0,3,1,4,2,2,1,1,2,1,1,-0.025735,-0.010902,-0.229317,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,0.811288,-0.126347,0.64204,1.438372,215354,5714462,,
1,1,1,5,2,2,1,1,2,1,2,-0.025735,-0.010902,-0.113056,0.087541,-0.111609,-0.006385,-0.06662,-0.022388,1.174735,0.127244,0.796047,0.133273,215354,5714463,,
2,1,1,4,2,2,1,2,2,1,2,-0.025735,-0.010902,0.095086,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,1.181023,0.014284,0.801596,0.133273,215354,5714464,,
3,1,1,5,2,2,1,2,2,2,2,-0.025735,-0.010902,-0.230467,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,1.181023,-0.162899,0.801596,0.133273,215354,5714465,,
4,1,1,4,2,1,1,2,2,1,2,-0.025735,0.631001,2.039479,-0.185991,-0.111609,-0.006385,-0.06662,-0.022388,0.645285,0.152438,0.794659,0.133273,215354,5714466,,


In [66]:
df_info_summary(bureau_df_fastai)

                           Non-Null Count  Null Count  % Null    Dtype
CREDIT_ACTIVE                     1716428           0    0.00     int8
CREDIT_CURRENCY                   1716428           0    0.00     int8
CREDIT_TYPE                       1716428           0    0.00     int8
AMT_ANNUITY_na                    1716428           0    0.00     int8
AMT_CREDIT_MAX_OVERDUE_na         1716428           0    0.00     int8
AMT_CREDIT_SUM_na                 1716428           0    0.00     int8
AMT_CREDIT_SUM_DEBT_na            1716428           0    0.00     int8
AMT_CREDIT_SUM_LIMIT_na           1716428           0    0.00     int8
DAYS_CREDIT_ENDDATE_na            1716428           0    0.00     int8
DAYS_ENDDATE_FACT_na              1716428           0    0.00     int8
AMT_ANNUITY                       1716428           0    0.00  float64
AMT_CREDIT_MAX_OVERDUE            1716428           0    0.00  float64
AMT_CREDIT_SUM                    1716428           0    0.00  float64
AMT_CR

### Joined data prep

In [71]:
bureau_df_agg = resumir_por_id(
    bureau_df_fastai,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_BUREAU'],
    verbose=True,
    nombre_conteo='bureau_records'
)

# Mostrar las primeras filas
bureau_df_agg.head()

Columnas excluidas: ['SK_ID_BUREAU']
Columnas resumidas: ['AMT_ANNUITY', 'AMT_ANNUITY_na', 'AMT_CREDIT_MAX_OVERDUE', 'AMT_CREDIT_MAX_OVERDUE_na', 'AMT_CREDIT_SUM', 'AMT_CREDIT_SUM_DEBT', 'AMT_CREDIT_SUM_DEBT_na', 'AMT_CREDIT_SUM_LIMIT', 'AMT_CREDIT_SUM_LIMIT_na', 'AMT_CREDIT_SUM_OVERDUE', 'AMT_CREDIT_SUM_na', 'CNT_CREDIT_PROLONG', 'CREDIT_ACTIVE', 'CREDIT_CURRENCY', 'CREDIT_DAY_OVERDUE', 'CREDIT_TYPE', 'DAYS_CREDIT', 'DAYS_CREDIT_ENDDATE', 'DAYS_CREDIT_ENDDATE_na', 'DAYS_CREDIT_UPDATE', 'DAYS_ENDDATE_FACT', 'DAYS_ENDDATE_FACT_na', 'MONTHS_BALANCE', 'STATUS']


Unnamed: 0,SK_ID_CURR,CREDIT_ACTIVE_mean,CREDIT_ACTIVE_min,CREDIT_ACTIVE_max,CREDIT_ACTIVE_median,CREDIT_ACTIVE_sum,CREDIT_CURRENCY_mean,CREDIT_CURRENCY_min,CREDIT_CURRENCY_max,CREDIT_CURRENCY_median,CREDIT_CURRENCY_sum,CREDIT_TYPE_mean,CREDIT_TYPE_min,CREDIT_TYPE_max,CREDIT_TYPE_median,CREDIT_TYPE_sum,AMT_ANNUITY_na_mean,AMT_ANNUITY_na_min,AMT_ANNUITY_na_max,AMT_ANNUITY_na_median,AMT_ANNUITY_na_sum,AMT_CREDIT_MAX_OVERDUE_na_mean,AMT_CREDIT_MAX_OVERDUE_na_min,AMT_CREDIT_MAX_OVERDUE_na_max,AMT_CREDIT_MAX_OVERDUE_na_median,AMT_CREDIT_MAX_OVERDUE_na_sum,AMT_CREDIT_SUM_na_mean,AMT_CREDIT_SUM_na_min,AMT_CREDIT_SUM_na_max,AMT_CREDIT_SUM_na_median,AMT_CREDIT_SUM_na_sum,AMT_CREDIT_SUM_DEBT_na_mean,AMT_CREDIT_SUM_DEBT_na_min,AMT_CREDIT_SUM_DEBT_na_max,AMT_CREDIT_SUM_DEBT_na_median,AMT_CREDIT_SUM_DEBT_na_sum,AMT_CREDIT_SUM_LIMIT_na_mean,AMT_CREDIT_SUM_LIMIT_na_min,AMT_CREDIT_SUM_LIMIT_na_max,AMT_CREDIT_SUM_LIMIT_na_median,AMT_CREDIT_SUM_LIMIT_na_sum,DAYS_CREDIT_ENDDATE_na_mean,DAYS_CREDIT_ENDDATE_na_min,DAYS_CREDIT_ENDDATE_na_max,DAYS_CREDIT_ENDDATE_na_median,DAYS_CREDIT_ENDDATE_na_sum,DAYS_ENDDATE_FACT_na_mean,DAYS_ENDDATE_FACT_na_min,DAYS_ENDDATE_FACT_na_max,DAYS_ENDDATE_FACT_na_median,DAYS_ENDDATE_FACT_na_sum,AMT_ANNUITY_mean,AMT_ANNUITY_min,AMT_ANNUITY_max,AMT_ANNUITY_median,AMT_ANNUITY_sum,AMT_CREDIT_MAX_OVERDUE_mean,AMT_CREDIT_MAX_OVERDUE_min,AMT_CREDIT_MAX_OVERDUE_max,AMT_CREDIT_MAX_OVERDUE_median,AMT_CREDIT_MAX_OVERDUE_sum,AMT_CREDIT_SUM_mean,AMT_CREDIT_SUM_min,AMT_CREDIT_SUM_max,AMT_CREDIT_SUM_median,AMT_CREDIT_SUM_sum,AMT_CREDIT_SUM_DEBT_mean,AMT_CREDIT_SUM_DEBT_min,AMT_CREDIT_SUM_DEBT_max,AMT_CREDIT_SUM_DEBT_median,AMT_CREDIT_SUM_DEBT_sum,AMT_CREDIT_SUM_LIMIT_mean,AMT_CREDIT_SUM_LIMIT_min,AMT_CREDIT_SUM_LIMIT_max,AMT_CREDIT_SUM_LIMIT_median,AMT_CREDIT_SUM_LIMIT_sum,AMT_CREDIT_SUM_OVERDUE_mean,AMT_CREDIT_SUM_OVERDUE_min,AMT_CREDIT_SUM_OVERDUE_max,AMT_CREDIT_SUM_OVERDUE_median,AMT_CREDIT_SUM_OVERDUE_sum,CNT_CREDIT_PROLONG_mean,CNT_CREDIT_PROLONG_min,CNT_CREDIT_PROLONG_max,CNT_CREDIT_PROLONG_median,CNT_CREDIT_PROLONG_sum,CREDIT_DAY_OVERDUE_mean,CREDIT_DAY_OVERDUE_min,CREDIT_DAY_OVERDUE_max,CREDIT_DAY_OVERDUE_median,CREDIT_DAY_OVERDUE_sum,DAYS_CREDIT_mean,DAYS_CREDIT_min,DAYS_CREDIT_max,DAYS_CREDIT_median,DAYS_CREDIT_sum,DAYS_CREDIT_ENDDATE_mean,DAYS_CREDIT_ENDDATE_min,DAYS_CREDIT_ENDDATE_max,DAYS_CREDIT_ENDDATE_median,DAYS_CREDIT_ENDDATE_sum,DAYS_CREDIT_UPDATE_mean,DAYS_CREDIT_UPDATE_min,DAYS_CREDIT_UPDATE_max,DAYS_CREDIT_UPDATE_median,DAYS_CREDIT_UPDATE_sum,DAYS_ENDDATE_FACT_mean,DAYS_ENDDATE_FACT_min,DAYS_ENDDATE_FACT_max,DAYS_ENDDATE_FACT_median,DAYS_ENDDATE_FACT_sum,STATUS_mean,STATUS_min,STATUS_max,STATUS_median,STATUS_sum,MONTHS_BALANCE_mean,MONTHS_BALANCE_min,MONTHS_BALANCE_max,MONTHS_BALANCE_median,MONTHS_BALANCE_sum,bureau_records_count
0,100001,2.142857,1,3,3.0,15,1.0,1,1,1.0,7,4.0,4,4,4.0,28,1.0,1,1,1.0,7,2.0,2,2,2.0,14,1.0,1,1,1.0,7,1.0,1,1,1.0,7,1.142857,1,2,1.0,8,1.0,1,1,1.0,7,1.428571,1,2,1.0,10,-0.00538,-0.025735,0.036403,-0.025735,-0.037657,-0.010902,-0.010902,-0.010902,-0.010902,-0.076317,-0.128169,-0.234381,0.02001,-0.16233,-0.897181,-0.049912,-0.185991,0.409851,-0.185991,-0.349383,-0.111609,-0.111609,-0.111609,-0.111609,-0.781265,-0.006385,-0.006385,-0.006385,-0.006385,-0.044696,-0.06662,-0.06662,-0.06662,-0.06662,-0.466337,-0.022388,-0.022388,-0.022388,-0.022388,-0.156718,0.511979,-0.540633,1.374693,0.358552,3.583854,-0.07773,-0.369201,0.272419,-0.131717,-0.544108,0.694565,0.608741,0.815471,0.608741,4.861953,0.204943,-0.622771,0.752493,0.133273,1.434604,149.0,9.0,365.0,192.0,1043.0,14.891032,2.534449,20.417374,17.309891,104.237223,7
1,100002,2.5,1,3,3.0,20,1.0,1,1,1.0,8,4.5,4,5,4.5,36,1.125,1,2,1.0,9,1.375,1,2,1.0,11,1.0,1,1,1.0,8,1.375,1,2,1.0,11,1.5,1,2,1.5,12,1.25,1,2,1.0,10,1.25,1,2,1.0,10,-0.025735,-0.025735,-0.025735,-0.025735,-0.205883,-0.00222,-0.010902,0.030778,-0.010902,-0.017759,-0.214698,-0.308741,0.082629,-0.261663,-1.717582,-0.136945,-0.185991,0.206376,-0.185991,-1.095563,-0.002274,-0.111609,0.763072,-0.111609,-0.018193,-0.006385,-0.006385,-0.006385,-0.006385,-0.051081,-0.06662,-0.06662,-0.06662,-0.06662,-0.532957,-0.022388,-0.022388,-0.022388,-0.022388,-0.179106,0.337173,-0.370857,1.306783,0.125267,2.69738,-0.165842,-0.316128,0.066324,-0.162899,-1.326737,0.130244,-0.820332,0.814083,0.265347,1.041956,0.39574,-0.371926,1.643609,0.120994,3.165921,47.5,6.0,94.0,55.0,380.0,3.564842,-5.872026,15.582428,2.843846,28.518732,8
2,100003,2.5,1,3,3.0,10,1.0,1,1,1.0,4,4.5,4,5,4.5,18,2.0,2,2,2.0,8,1.0,1,1,1.0,4,1.0,1,1,1.0,4,1.0,1,1,1.0,4,1.0,1,1,1.0,4,1.0,1,1,1.0,4,1.25,1,2,1.0,5,-0.025735,-0.025735,-0.025735,-0.025735,-0.102941,-0.010902,-0.010902,-0.010902,-0.010902,-0.04361,-0.08753,-0.289392,0.395725,-0.228227,-0.35012,-0.185991,-0.185991,-0.185991,-0.185991,-0.743965,5.425463,-0.111609,22.036678,-0.111609,21.701851,-0.006385,-0.006385,-0.006385,-0.006385,-0.025541,-0.06662,-0.06662,-0.06662,-0.06662,-0.266479,-0.022388,-0.022388,-0.022388,-0.022388,-0.089553,-0.325269,-1.815841,0.67421,-0.079722,-1.301075,-0.207195,-0.597392,0.156362,-0.193876,-0.828781,-0.308363,-2.132859,0.764135,0.067636,-1.233452,-0.13029,-2.031365,0.75951,0.375348,-0.521159,,,,,0.0,,,,,0.0,4
3,100004,3.0,3,3,3.0,6,1.0,1,1,1.0,2,4.0,4,4,4.0,8,2.0,2,2,2.0,4,1.5,1,2,1.5,3,1.0,1,1,1.0,2,1.0,1,1,1.0,2,1.0,1,1,1.0,2,1.0,1,1,1.0,2,1.0,1,1,1.0,2,-0.025735,-0.025735,-0.025735,-0.025735,-0.051471,-0.010902,-0.010902,-0.010902,-0.010902,-0.021805,-0.226537,-0.226554,-0.226521,-0.226537,-0.453074,-0.185991,-0.185991,-0.185991,-0.185991,-0.371983,-0.111609,-0.111609,-0.111609,-0.111609,-0.223219,-0.006385,-0.006385,-0.006385,-0.006385,-0.01277,-0.06662,-0.06662,-0.06662,-0.06662,-0.133239,-0.022388,-0.022388,-0.022388,-0.022388,-0.044777,0.345976,-0.231263,0.923215,0.345976,0.691951,-0.195631,-0.217624,-0.173638,-0.195631,-0.391262,0.085673,-0.122445,0.29379,0.085673,0.171345,0.772666,0.508665,1.036668,0.772666,1.545332,,,,,0.0,,,,,0.0,2
4,100005,1.666667,1,3,1.0,5,1.0,1,1,1.0,3,4.333333,4,5,4.0,13,1.0,1,1,1.0,3,1.666667,1,2,2.0,5,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.666667,1,2,2.0,5,-0.017579,-0.025735,-0.001268,-0.025735,-0.052738,-0.010902,-0.010902,-0.010902,-0.010902,-0.032707,-0.118238,-0.282801,0.18595,-0.257863,-0.354714,0.11648,-0.185991,0.680998,-0.145568,0.349439,-0.111609,-0.111609,-0.111609,-0.111609,-0.334828,-0.006385,-0.006385,-0.006385,-0.006385,-0.019155,-0.06662,-0.06662,-0.06662,-0.06662,-0.199859,-0.022388,-0.022388,-0.022388,-0.022388,-0.067165,1.196533,0.967231,1.358345,1.264025,3.5896,-0.004026,-0.121185,0.178665,-0.069558,-0.012078,0.748411,0.655914,0.808534,0.780785,2.245233,0.585848,0.133273,1.490997,0.133273,1.757543,21.666667,5.0,50.0,10.0,65.0,7.746167,3.738818,13.477836,6.021848,23.238501,3


In [72]:
bureau_df_agg.shape

(305811, 122)

In [73]:
df_info_summary(bureau_df_agg)

                                  Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                                305811           0     0.0    int64
CREDIT_ACTIVE_mean                        305811           0     0.0  float64
CREDIT_ACTIVE_min                         305811           0     0.0     int8
CREDIT_ACTIVE_max                         305811           0     0.0     int8
CREDIT_ACTIVE_median                      305811           0     0.0  float64
CREDIT_ACTIVE_sum                         305811           0     0.0    int64
CREDIT_CURRENCY_mean                      305811           0     0.0  float64
CREDIT_CURRENCY_min                       305811           0     0.0     int8
CREDIT_CURRENCY_max                       305811           0     0.0     int8
CREDIT_CURRENCY_median                    305811           0     0.0  float64
CREDIT_CURRENCY_sum                       305811           0     0.0     int8
CREDIT_TYPE_mean                          305811           0    

In [74]:
bureau_df_agg['SK_ID_CURR'].nunique()

305811

## Previous application data

### Data import and overview

In [75]:
# Create data profiles for bureau and bureau_balance datasets

previous_application_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/previous_application.csv"))
pos_cash_balance_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/POS_CASH_balance.csv"))
installments_payments_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/installments_payments.csv"))
credit_card_balance_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/credit_card_balance.csv"))

# data_profiling(previous_application_df, "previous_application_df.html")
# data_profiling(pos_cash_balance_df, "pos_cash_balance_df.html")
# data_profiling(installments_payments_df, "installments_payments_df.html")
# data_profiling(credit_card_balance_df, "credit_card_balance_df.html")

In [76]:
# Mostrar las primeras filas de previous_application_df
previous_application_df.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,Y,1,0.0,0.182832,0.867336,XAP,Approved,-73,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,Y,1,,,,XNA,Approved,-164,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,Y,1,,,,XNA,Approved,-301,Cash through the bank,XAP,"Spouse, partner",Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,,450000.0,MONDAY,7,Y,1,,,,XNA,Approved,-512,Cash through the bank,XAP,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,,337500.0,THURSDAY,9,Y,1,,,,Repairs,Refused,-781,Cash through the bank,HC,,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high,,,,,,


In [77]:
len(previous_application_df)

1670214

In [78]:
# Mostrar las primeras filas de pos_cash_balance_df
pos_cash_balance_df.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT_FUTURE,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,1803195,182943,-31,48.0,45.0,Active,0,0
1,1715348,367990,-33,36.0,35.0,Active,0,0
2,1784872,397406,-32,12.0,9.0,Active,0,0
3,1903291,269225,-35,48.0,42.0,Active,0,0
4,2341044,334279,-35,36.0,35.0,Active,0,0


In [79]:
len(pos_cash_balance_df)

10001358

In [80]:
# Mostrar las primeras filas de installments_payments_df
installments_payments_df.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NUM_INSTALMENT_VERSION,NUM_INSTALMENT_NUMBER,DAYS_INSTALMENT,DAYS_ENTRY_PAYMENT,AMT_INSTALMENT,AMT_PAYMENT
0,1054186,161674,1.0,6,-1180.0,-1187.0,6948.36,6948.36
1,1330831,151639,0.0,34,-2156.0,-2156.0,1716.525,1716.525
2,2085231,193053,2.0,1,-63.0,-63.0,25425.0,25425.0
3,2452527,199697,1.0,3,-2418.0,-2426.0,24350.13,24350.13
4,2714724,167756,1.0,2,-1383.0,-1366.0,2165.04,2160.585


In [81]:
len(installments_payments_df)

13605401

In [82]:
# Mostrar las primeras filas de credit_card_balance_df
credit_card_balance_df.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.425,31460.085,31460.085,0.0,0,0.0,0.0,30.0,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.285,233048.97,233048.97,1.0,1,0.0,0.0,10.0,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.395,453919.455,453919.455,0.0,1,0.0,1.0,101.0,Active,0,0


In [83]:
len(credit_card_balance_df)

3840312

### Data prep - previous_application_df 

In [84]:
# Info sobre las columnas del previous_application_df

"""
•	SK_ID_PREV → ID de la aplicación previa.
•	SK_ID_CURR → ID del préstamo actual en nuestro dataset.
•	NAME_CONTRACT_TYPE → Tipo de producto solicitado (Cash loan, POS loan, etc.).
•	AMT_ANNUITY → Cuota periódica (anualidad) de esa solicitud.
•	AMT_APPLICATION → Monto que el cliente pidió originalmente.
•	AMT_CREDIT → Monto finalmente aprobado (puede diferir de lo solicitado).
•	AMT_DOWN_PAYMENT → Pago inicial hecho por el cliente.
•	AMT_GOODS_PRICE → Valor de los bienes financiados (si aplica).
•	WEEKDAY_APPR_PROCESS_START → Día de la semana en que se inició la aplicación.
•	HOUR_APPR_PROCESS_START → Hora del día de inicio (aprox., redondeada).
•	FLAG_LAST_APPL_PER_CONTRACT → Marca si fue la última solicitud para ese contrato.
•	NFLAG_LAST_APPL_IN_DAY → Marca si fue la última aplicación del cliente en ese día.
•	NFLAG_MICRO_CASH → Flag si era un microcrédito.
•	RATE_DOWN_PAYMENT → Porcentaje de pago inicial (normalizado).
•	RATE_INTEREST_PRIMARY / PRIVILEGED → Tasas de interés aplicables (normalizadas).
•	NAME_CASH_LOAN_PURPOSE → Propósito del préstamo en efectivo (educación, auto, etc.).
•	NAME_CONTRACT_STATUS → Estado de la aplicación (Approved, Refused, Canceled, etc.).
•	DAYS_DECISION → Días relativos a la aplicación actual en que se tomó la decisión.
•	NAME_PAYMENT_TYPE → Método de pago (Cash, Bank transfer, etc.).
•	CODE_REJECT_REASON → Razón de rechazo (CLIENT, HC, SCO, etc.).
•	NAME_TYPE_SUITE → Con quién estaba el cliente (Family, Alone, etc.).
•	NAME_CLIENT_TYPE → Si era cliente nuevo o recurrente.
•	NAME_GOODS_CATEGORY → Categoría del bien solicitado (Electronics, Furniture, etc.).
•	NAME_PORTFOLIO → Cartera (POS, Cash, Car, etc.).
•	NAME_PRODUCT_TYPE → Tipo de producto (X-Sell, Walk-in, etc.).
•	CHANNEL_TYPE → Canal de aplicación (Credit agent, Online, etc.).
•	SELLERPLACE_AREA → Tamaño del área de ventas del vendedor.
•	NAME_SELLER_INDUSTRY → Industria del vendedor.
•	CNT_PAYMENT → Número de pagos previstos (plazo).
•	NAME_YIELD_GROUP → Clasificación de la tasa de interés (baja, media, alta).
•	PRODUCT_COMBINATION → Detalle de la combinación de productos.
•	DAYS_FIRST_DRAWING → Días hasta la primera disposición de fondos.
•	DAYS_FIRST_DUE → Días hasta el primer pago esperado.
•	DAYS_LAST_DUE_1ST_VERSION → Último vencimiento esperado (versión inicial).
•	DAYS_LAST_DUE → Último vencimiento esperado (versión final).
•	DAYS_TERMINATION → Días hasta la finalización esperada del contrato.
•	NFLAG_INSURED_ON_APPROVAL → Si el cliente solicitó seguro.

"""
df_info_summary(previous_application_df)

                             Non-Null Count  Null Count  % Null    Dtype
SK_ID_PREV                          1670214           0    0.00    int64
SK_ID_CURR                          1670214           0    0.00    int64
NAME_CONTRACT_TYPE                  1670214           0    0.00   object
AMT_ANNUITY                         1297979      372235   22.29  float64
AMT_APPLICATION                     1670214           0    0.00  float64
AMT_CREDIT                          1670213           1    0.00  float64
AMT_DOWN_PAYMENT                     774370      895844   53.64  float64
AMT_GOODS_PRICE                     1284699      385515   23.08  float64
WEEKDAY_APPR_PROCESS_START          1670214           0    0.00   object
HOUR_APPR_PROCESS_START             1670214           0    0.00    int64
FLAG_LAST_APPL_PER_CONTRACT         1670214           0    0.00   object
NFLAG_LAST_APPL_IN_DAY              1670214           0    0.00    int64
RATE_DOWN_PAYMENT                    774370      89

In [85]:
previous_application_df_fastai = procesar_tabular_fastai(
    previous_application_df,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True
)

df_info_summary(previous_application_df_fastai)

Columnas categóricas procesadas: ['CHANNEL_TYPE', 'CODE_REJECT_REASON', 'FLAG_LAST_APPL_PER_CONTRACT', 'NAME_CASH_LOAN_PURPOSE', 'NAME_CLIENT_TYPE', 'NAME_CONTRACT_STATUS', 'NAME_CONTRACT_TYPE', 'NAME_GOODS_CATEGORY', 'NAME_PAYMENT_TYPE', 'NAME_PORTFOLIO', 'NAME_PRODUCT_TYPE', 'NAME_SELLER_INDUSTRY', 'NAME_TYPE_SUITE', 'NAME_YIELD_GROUP', 'PRODUCT_COMBINATION', 'WEEKDAY_APPR_PROCESS_START']
Columnas numéricas procesadas: ['AMT_ANNUITY', 'AMT_APPLICATION', 'AMT_CREDIT', 'AMT_DOWN_PAYMENT', 'AMT_GOODS_PRICE', 'CNT_PAYMENT', 'DAYS_DECISION', 'DAYS_FIRST_DRAWING', 'DAYS_FIRST_DUE', 'DAYS_LAST_DUE', 'DAYS_LAST_DUE_1ST_VERSION', 'DAYS_TERMINATION', 'HOUR_APPR_PROCESS_START', 'NFLAG_INSURED_ON_APPROVAL', 'NFLAG_LAST_APPL_IN_DAY', 'RATE_DOWN_PAYMENT', 'RATE_INTEREST_PRIMARY', 'RATE_INTEREST_PRIVILEGED', 'SELLERPLACE_AREA']
Columnas excluidas del procesamiento: ['SK_ID_CURR', 'SK_ID_PREV']
                              Non-Null Count  Null Count  % Null    Dtype
CHANNEL_TYPE                    

In [87]:
print("Count distinct of SK_ID_PREV: ", previous_application_df_fastai["SK_ID_PREV"].nunique())
print("Count distinct of SK_ID_CURR: ", previous_application_df_fastai["SK_ID_CURR"].nunique())

Count distinct of SK_ID_PREV:  1670214
Count distinct of SK_ID_CURR:  338857


In [88]:
previous_application_summary = resumir_por_id(
    previous_application_df_fastai,
    id_col='SK_ID_CURR',
    excluir_cols=None,
    verbose=True,
    nombre_conteo='previous_application_records'
)

# Mostrar las primeras filas
previous_application_summary.head()

Columnas excluidas: []
Columnas resumidas: ['AMT_ANNUITY', 'AMT_ANNUITY_na', 'AMT_APPLICATION', 'AMT_CREDIT', 'AMT_CREDIT_na', 'AMT_DOWN_PAYMENT', 'AMT_DOWN_PAYMENT_na', 'AMT_GOODS_PRICE', 'AMT_GOODS_PRICE_na', 'CHANNEL_TYPE', 'CNT_PAYMENT', 'CNT_PAYMENT_na', 'CODE_REJECT_REASON', 'DAYS_DECISION', 'DAYS_FIRST_DRAWING', 'DAYS_FIRST_DRAWING_na', 'DAYS_FIRST_DUE', 'DAYS_FIRST_DUE_na', 'DAYS_LAST_DUE', 'DAYS_LAST_DUE_1ST_VERSION', 'DAYS_LAST_DUE_1ST_VERSION_na', 'DAYS_LAST_DUE_na', 'DAYS_TERMINATION', 'DAYS_TERMINATION_na', 'FLAG_LAST_APPL_PER_CONTRACT', 'HOUR_APPR_PROCESS_START', 'NAME_CASH_LOAN_PURPOSE', 'NAME_CLIENT_TYPE', 'NAME_CONTRACT_STATUS', 'NAME_CONTRACT_TYPE', 'NAME_GOODS_CATEGORY', 'NAME_PAYMENT_TYPE', 'NAME_PORTFOLIO', 'NAME_PRODUCT_TYPE', 'NAME_SELLER_INDUSTRY', 'NAME_TYPE_SUITE', 'NAME_YIELD_GROUP', 'NFLAG_INSURED_ON_APPROVAL', 'NFLAG_INSURED_ON_APPROVAL_na', 'NFLAG_LAST_APPL_IN_DAY', 'PRODUCT_COMBINATION', 'RATE_DOWN_PAYMENT', 'RATE_DOWN_PAYMENT_na', 'RATE_INTEREST_PRIMARY'

Unnamed: 0,SK_ID_CURR,CHANNEL_TYPE_mean,CHANNEL_TYPE_min,CHANNEL_TYPE_max,CHANNEL_TYPE_median,CHANNEL_TYPE_sum,CODE_REJECT_REASON_mean,CODE_REJECT_REASON_min,CODE_REJECT_REASON_max,CODE_REJECT_REASON_median,CODE_REJECT_REASON_sum,FLAG_LAST_APPL_PER_CONTRACT_mean,FLAG_LAST_APPL_PER_CONTRACT_min,FLAG_LAST_APPL_PER_CONTRACT_max,FLAG_LAST_APPL_PER_CONTRACT_median,FLAG_LAST_APPL_PER_CONTRACT_sum,NAME_CASH_LOAN_PURPOSE_mean,NAME_CASH_LOAN_PURPOSE_min,NAME_CASH_LOAN_PURPOSE_max,NAME_CASH_LOAN_PURPOSE_median,NAME_CASH_LOAN_PURPOSE_sum,NAME_CLIENT_TYPE_mean,NAME_CLIENT_TYPE_min,NAME_CLIENT_TYPE_max,NAME_CLIENT_TYPE_median,NAME_CLIENT_TYPE_sum,NAME_CONTRACT_STATUS_mean,NAME_CONTRACT_STATUS_min,NAME_CONTRACT_STATUS_max,NAME_CONTRACT_STATUS_median,NAME_CONTRACT_STATUS_sum,NAME_CONTRACT_TYPE_mean,NAME_CONTRACT_TYPE_min,NAME_CONTRACT_TYPE_max,NAME_CONTRACT_TYPE_median,NAME_CONTRACT_TYPE_sum,NAME_GOODS_CATEGORY_mean,NAME_GOODS_CATEGORY_min,NAME_GOODS_CATEGORY_max,NAME_GOODS_CATEGORY_median,NAME_GOODS_CATEGORY_sum,NAME_PAYMENT_TYPE_mean,NAME_PAYMENT_TYPE_min,NAME_PAYMENT_TYPE_max,NAME_PAYMENT_TYPE_median,NAME_PAYMENT_TYPE_sum,NAME_PORTFOLIO_mean,NAME_PORTFOLIO_min,NAME_PORTFOLIO_max,NAME_PORTFOLIO_median,NAME_PORTFOLIO_sum,NAME_PRODUCT_TYPE_mean,NAME_PRODUCT_TYPE_min,NAME_PRODUCT_TYPE_max,NAME_PRODUCT_TYPE_median,NAME_PRODUCT_TYPE_sum,NAME_SELLER_INDUSTRY_mean,NAME_SELLER_INDUSTRY_min,NAME_SELLER_INDUSTRY_max,NAME_SELLER_INDUSTRY_median,NAME_SELLER_INDUSTRY_sum,NAME_TYPE_SUITE_mean,NAME_TYPE_SUITE_min,NAME_TYPE_SUITE_max,NAME_TYPE_SUITE_median,NAME_TYPE_SUITE_sum,NAME_YIELD_GROUP_mean,NAME_YIELD_GROUP_min,NAME_YIELD_GROUP_max,NAME_YIELD_GROUP_median,NAME_YIELD_GROUP_sum,PRODUCT_COMBINATION_mean,PRODUCT_COMBINATION_min,PRODUCT_COMBINATION_max,PRODUCT_COMBINATION_median,PRODUCT_COMBINATION_sum,WEEKDAY_APPR_PROCESS_START_mean,WEEKDAY_APPR_PROCESS_START_min,WEEKDAY_APPR_PROCESS_START_max,WEEKDAY_APPR_PROCESS_START_median,WEEKDAY_APPR_PROCESS_START_sum,AMT_ANNUITY_na_mean,AMT_ANNUITY_na_min,AMT_ANNUITY_na_max,AMT_ANNUITY_na_median,AMT_ANNUITY_na_sum,AMT_CREDIT_na_mean,AMT_CREDIT_na_min,AMT_CREDIT_na_max,AMT_CREDIT_na_median,AMT_CREDIT_na_sum,AMT_DOWN_PAYMENT_na_mean,AMT_DOWN_PAYMENT_na_min,AMT_DOWN_PAYMENT_na_max,AMT_DOWN_PAYMENT_na_median,AMT_DOWN_PAYMENT_na_sum,AMT_GOODS_PRICE_na_mean,AMT_GOODS_PRICE_na_min,AMT_GOODS_PRICE_na_max,AMT_GOODS_PRICE_na_median,AMT_GOODS_PRICE_na_sum,CNT_PAYMENT_na_mean,CNT_PAYMENT_na_min,CNT_PAYMENT_na_max,CNT_PAYMENT_na_median,CNT_PAYMENT_na_sum,DAYS_FIRST_DRAWING_na_mean,DAYS_FIRST_DRAWING_na_min,DAYS_FIRST_DRAWING_na_max,DAYS_FIRST_DRAWING_na_median,DAYS_FIRST_DRAWING_na_sum,DAYS_FIRST_DUE_na_mean,DAYS_FIRST_DUE_na_min,DAYS_FIRST_DUE_na_max,DAYS_FIRST_DUE_na_median,DAYS_FIRST_DUE_na_sum,DAYS_LAST_DUE_na_mean,DAYS_LAST_DUE_na_min,DAYS_LAST_DUE_na_max,DAYS_LAST_DUE_na_median,DAYS_LAST_DUE_na_sum,DAYS_LAST_DUE_1ST_VERSION_na_mean,DAYS_LAST_DUE_1ST_VERSION_na_min,DAYS_LAST_DUE_1ST_VERSION_na_max,DAYS_LAST_DUE_1ST_VERSION_na_median,DAYS_LAST_DUE_1ST_VERSION_na_sum,DAYS_TERMINATION_na_mean,DAYS_TERMINATION_na_min,DAYS_TERMINATION_na_max,DAYS_TERMINATION_na_median,DAYS_TERMINATION_na_sum,NFLAG_INSURED_ON_APPROVAL_na_mean,NFLAG_INSURED_ON_APPROVAL_na_min,NFLAG_INSURED_ON_APPROVAL_na_max,NFLAG_INSURED_ON_APPROVAL_na_median,NFLAG_INSURED_ON_APPROVAL_na_sum,RATE_DOWN_PAYMENT_na_mean,RATE_DOWN_PAYMENT_na_min,RATE_DOWN_PAYMENT_na_max,RATE_DOWN_PAYMENT_na_median,RATE_DOWN_PAYMENT_na_sum,RATE_INTEREST_PRIMARY_na_mean,RATE_INTEREST_PRIMARY_na_min,RATE_INTEREST_PRIMARY_na_max,RATE_INTEREST_PRIMARY_na_median,RATE_INTEREST_PRIMARY_na_sum,RATE_INTEREST_PRIVILEGED_na_mean,RATE_INTEREST_PRIVILEGED_na_min,RATE_INTEREST_PRIVILEGED_na_max,RATE_INTEREST_PRIVILEGED_na_median,RATE_INTEREST_PRIVILEGED_na_sum,AMT_ANNUITY_mean,AMT_ANNUITY_min,AMT_ANNUITY_max,AMT_ANNUITY_median,AMT_ANNUITY_sum,AMT_APPLICATION_mean,AMT_APPLICATION_min,AMT_APPLICATION_max,AMT_APPLICATION_median,AMT_APPLICATION_sum,AMT_CREDIT_mean,AMT_CREDIT_min,AMT_CREDIT_max,AMT_CREDIT_median,AMT_CREDIT_sum,AMT_DOWN_PAYMENT_mean,AMT_DOWN_PAYMENT_min,AMT_DOWN_PAYMENT_max,AMT_DOWN_PAYMENT_median,AMT_DOWN_PAYMENT_sum,AMT_GOODS_PRICE_mean,AMT_GOODS_PRICE_min,AMT_GOODS_PRICE_max,AMT_GOODS_PRICE_median,AMT_GOODS_PRICE_sum,CNT_PAYMENT_mean,CNT_PAYMENT_min,CNT_PAYMENT_max,CNT_PAYMENT_median,CNT_PAYMENT_sum,DAYS_DECISION_mean,DAYS_DECISION_min,DAYS_DECISION_max,DAYS_DECISION_median,DAYS_DECISION_sum,DAYS_FIRST_DRAWING_mean,DAYS_FIRST_DRAWING_min,DAYS_FIRST_DRAWING_max,DAYS_FIRST_DRAWING_median,DAYS_FIRST_DRAWING_sum,DAYS_FIRST_DUE_mean,DAYS_FIRST_DUE_min,DAYS_FIRST_DUE_max,DAYS_FIRST_DUE_median,DAYS_FIRST_DUE_sum,DAYS_LAST_DUE_mean,DAYS_LAST_DUE_min,DAYS_LAST_DUE_max,DAYS_LAST_DUE_median,DAYS_LAST_DUE_sum,DAYS_LAST_DUE_1ST_VERSION_mean,DAYS_LAST_DUE_1ST_VERSION_min,DAYS_LAST_DUE_1ST_VERSION_max,DAYS_LAST_DUE_1ST_VERSION_median,DAYS_LAST_DUE_1ST_VERSION_sum,DAYS_TERMINATION_mean,DAYS_TERMINATION_min,DAYS_TERMINATION_max,DAYS_TERMINATION_median,DAYS_TERMINATION_sum,HOUR_APPR_PROCESS_START_mean,HOUR_APPR_PROCESS_START_min,HOUR_APPR_PROCESS_START_max,HOUR_APPR_PROCESS_START_median,HOUR_APPR_PROCESS_START_sum,NFLAG_INSURED_ON_APPROVAL_mean,NFLAG_INSURED_ON_APPROVAL_min,NFLAG_INSURED_ON_APPROVAL_max,NFLAG_INSURED_ON_APPROVAL_median,NFLAG_INSURED_ON_APPROVAL_sum,NFLAG_LAST_APPL_IN_DAY_mean,NFLAG_LAST_APPL_IN_DAY_min,NFLAG_LAST_APPL_IN_DAY_max,NFLAG_LAST_APPL_IN_DAY_median,NFLAG_LAST_APPL_IN_DAY_sum,RATE_DOWN_PAYMENT_mean,RATE_DOWN_PAYMENT_min,RATE_DOWN_PAYMENT_max,RATE_DOWN_PAYMENT_median,RATE_DOWN_PAYMENT_sum,RATE_INTEREST_PRIMARY_mean,RATE_INTEREST_PRIMARY_min,RATE_INTEREST_PRIMARY_max,RATE_INTEREST_PRIMARY_median,RATE_INTEREST_PRIMARY_sum,RATE_INTEREST_PRIVILEGED_mean,RATE_INTEREST_PRIVILEGED_min,RATE_INTEREST_PRIVILEGED_max,RATE_INTEREST_PRIVILEGED_median,RATE_INTEREST_PRIVILEGED_sum,SELLERPLACE_AREA_mean,SELLERPLACE_AREA_min,SELLERPLACE_AREA_max,SELLERPLACE_AREA_median,SELLERPLACE_AREA_sum,SK_ID_PREV_mean,SK_ID_PREV_min,SK_ID_PREV_max,SK_ID_PREV_median,SK_ID_PREV_sum,previous_application_records_count
0,100001,5.0,5,5,5.0,5,8.0,8,8,8.0,8,2.0,2,2,2.0,2,24.0,24,24,24.0,24,2.0,2,2,2.0,2,1.0,1,1,1.0,1,2.0,2,2,2.0,2,20.0,20,20,20.0,20,1.0,1,1,1.0,1,4.0,4,4,4.0,4,1.0,1,1,1.0,1,3.0,3,3,3.0,3,2.0,2,2,2.0,2,2.0,2,2,2.0,2,14.0,14,14,14.0,14,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,2.0,2,2,2.0,2,-0.831379,-0.831379,-0.831379,-0.831379,-0.831379,-0.513691,-0.513691,-0.513691,-0.513691,-0.513691,-0.540932,-0.540932,-0.540932,-0.540932,-0.540932,-0.101174,-0.101174,-0.101174,-0.101174,-0.101174,-0.627873,-0.627873,-0.627873,-0.627873,-0.627873,-0.552074,-0.552074,-0.552074,-0.552074,-0.552074,-1.102966,-1.102966,-1.102966,-1.102966,-1.102966,0.197503,0.197503,0.197503,0.197503,0.197503,-0.170613,-0.170613,-0.170613,-0.170613,-0.170613,-0.387346,-0.387346,-0.387346,-0.387346,-0.387346,-0.255368,-0.255368,-0.255368,-0.255368,-0.255368,-0.402338,-0.402338,-0.402338,-0.402338,-0.402338,0.154713,0.154713,0.154713,0.154713,0.154713,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,0.531527,0.531527,0.531527,0.531527,0.531527,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,-0.040821,-0.040821,-0.040821,-0.040821,-0.040821,1369693.0,1369693,1369693,1369693.0,1369693,1
1,100002,8.0,8,8,8.0,8,8.0,8,8,8.0,8,2.0,2,2,2.0,2,24.0,24,24,24.0,24,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,26.0,26,26,26.0,26,4.0,4,4,4.0,4,4.0,4,4,4.0,4,1.0,1,1,1.0,1,1.0,1,1,1.0,1,0.0,0,0,0.0,0,4.0,4,4,4.0,4,16.0,16,16,16.0,16,3.0,3,3,3.0,3,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,2.0,2,2,2.0,2,-0.42912,-0.42912,-0.42912,-0.42912,-0.42912,0.013051,0.013051,0.013051,0.013051,0.013051,-0.053548,-0.053548,-0.053548,-0.053548,-0.053548,-0.27536,-0.27536,-0.27536,-0.27536,-0.27536,-0.078781,-0.078781,-0.078781,-0.078781,-0.078781,0.683238,0.683238,0.683238,0.683238,0.683238,0.35256,0.35256,0.35256,0.35256,0.35256,0.197503,0.197503,0.197503,0.197503,0.197503,-0.150342,-0.150342,-0.150342,-0.150342,-0.150342,-0.374243,-0.374243,-0.374243,-0.374243,-0.374243,-0.236091,-0.236091,-0.236091,-0.236091,-0.236091,-0.389596,-0.389596,-0.389596,-0.389596,-0.389596,-1.045037,-1.045037,-1.045037,-1.045037,-1.045037,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,-0.864389,-0.864389,-0.864389,-0.864389,-0.864389,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,0.026103,0.026103,0.026103,0.026103,0.026103,1038818.0,1038818,1038818,1038818.0,1038818,1
2,100003,6.333333,5,8,6.0,19,8.0,8,8,8.0,24,2.0,2,2,2.0,6,24.333333,24,25,24.0,73,2.333333,2,3,2.0,7,1.0,1,1,1.0,3,1.666667,1,2,2.0,5,16.0,8,28,12.0,48,2.0,1,4,1.0,6,3.666667,3,4,4.0,11,1.666667,1,3,1.0,5,7.333333,5,11,6.0,22,3.666667,2,7,2.0,11,4.666667,4,5,5.0,14,10.0,8,12,10.0,30,2.666667,1,4,3.0,8,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.333333,1,2,1.0,4,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.333333,1,2,1.0,4,2.0,2,2,2.0,6,2.0,2,2,2.0,6,3.160497,-0.619935,6.332796,3.76863,9.481492,0.888732,-0.363496,2.475466,0.554226,2.666196,0.904269,-0.40198,2.636018,0.478769,2.712808,-0.078986,-0.27536,0.200541,-0.162139,-0.236959,0.834055,-0.471305,2.488113,0.485356,2.502165,-0.39766,-0.706488,-0.243246,-0.243246,-1.192981,-0.544629,-1.87437,0.172866,0.067616,-1.633888,0.197503,0.197503,0.197503,0.197503,0.592508,-0.162911,-0.181262,-0.153018,-0.154453,-0.488733,-0.382704,-0.390313,-0.378444,-0.379356,-1.148112,-0.249496,-0.261078,-0.242157,-0.245255,-0.748489,-0.397827,-0.405246,-0.39367,-0.394565,-1.193482,0.654609,-0.145224,1.354464,0.754588,1.963828,1.173491,-0.497734,2.009104,2.009104,3.520473,0.05954,0.05954,0.05954,0.05954,0.17862,-0.187945,-0.864389,0.474452,-0.173897,-0.563834,0.000521,0.000521,0.000521,0.000521,0.001563,0.031098,0.031098,0.031098,0.031098,0.093294,0.030733,-0.044189,0.152376,-0.015988,0.0922,2281150.0,1810518,2636178,2396755.0,6843451,3
3,100004,7.0,7,7,7.0,7,8.0,8,8,8.0,8,2.0,2,2,2.0,2,24.0,24,24,24.0,24,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,20.0,20,20,20.0,20,1.0,1,1,1.0,1,4.0,4,4,4.0,4,1.0,1,1,1.0,1,3.0,3,3,3.0,3,7.0,7,7,7.0,7,5.0,5,5,5.0,5,15.0,15,15,15.0,15,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,2.0,2,2,2.0,2,-0.724663,-0.724663,-0.724663,-0.724663,-0.724663,-0.515582,-0.515582,-0.515582,-0.515582,-0.515582,-0.552486,-0.552486,-0.552486,-0.552486,-0.552486,0.06057,0.06057,0.06057,0.06057,0.06057,-0.629843,-0.629843,-0.629843,-0.629843,-0.629843,-0.860902,-0.860902,-0.860902,-0.860902,-0.860902,0.084302,0.084302,0.084302,0.084302,0.084302,0.197503,0.197503,0.197503,0.197503,0.197503,-0.154223,-0.154223,-0.154223,-0.154223,-0.154223,-0.379989,-0.379989,-0.379989,-0.379989,-0.379989,-0.245813,-0.245813,-0.245813,-0.245813,-0.245813,-0.395164,-0.395164,-0.395164,-0.395164,-0.395164,-2.244787,-2.244787,-2.244787,-2.244787,-2.244787,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,1.972338,1.972338,1.972338,1.972338,1.972338,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,-0.039839,-0.039839,-0.039839,-0.039839,-0.039839,1564014.0,1564014,1564014,1564014.0,1564014,1
4,100005,5.5,5,6,5.5,11,8.0,8,8,8.0,16,2.0,2,2,2.0,4,24.5,24,25,24.5,49,2.0,1,3,2.0,4,1.5,1,2,1.5,3,1.5,1,2,1.5,3,24.0,20,28,24.0,48,2.5,1,4,2.5,5,4.5,4,5,4.5,9,1.0,1,1,1.0,2,7.0,3,11,7.0,14,0.0,0,0,0.0,0,1.5,1,2,1.5,3,8.5,3,14,8.5,17,3.0,1,5,3.0,6,1.5,1,2,1.5,3,1.0,1,1,1.0,2,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,2.0,2,2,2.0,4,2.0,2,2,2.0,4,-0.521715,-0.765949,-0.277481,-0.521715,-1.04343,-0.522321,-0.598518,-0.446125,-0.522321,-1.044643,-0.552578,-0.615599,-0.489557,-0.552578,-1.105156,-0.064471,-0.162139,0.033198,-0.064471,-0.128941,-0.436914,-0.55744,-0.316388,-0.436914,-0.873827,-0.243246,-0.243246,-0.243246,-0.243246,-0.486493,0.442408,0.158747,0.726069,0.442408,0.884816,0.197503,0.197503,0.197503,0.197503,0.395005,-0.153948,-0.155055,-0.15284,-0.153948,-0.307896,-0.37816,-0.378452,-0.377868,-0.37816,-0.75632,-0.241949,-0.242038,-0.24186,-0.241949,-0.483898,-0.393291,-0.393447,-0.393135,-0.393291,-0.786582,-0.595131,-0.745099,-0.445162,-0.595131,-1.190261,-0.497734,-0.497734,-0.497734,-0.497734,-0.995468,0.05954,0.05954,0.05954,0.05954,0.11908,0.209843,-0.173897,0.593583,0.209843,0.419686,0.000521,0.000521,0.000521,0.000521,0.001042,0.031098,0.031098,0.031098,0.031098,0.062196,-0.041523,-0.044189,-0.038857,-0.041523,-0.083046,2176837.0,1857999,2495675,2176837.0,4353674,2


In [89]:
previous_application_summary.shape

(338857, 252)

In [91]:
del previous_application_df
gc.collect()

2906

### Data prep - pos_cash_balance

In [92]:
len(pos_cash_balance_df)

10001358

In [93]:
# Info sobre las columnas del pos_cash_balance_df
"""
•	SK_ID_PREV → ID del crédito previo en Home Credit (un préstamo en application puede tener 0, 1 o varios).
•	SK_ID_CURR → ID del préstamo en nuestro dataset principal (application).
•	MONTHS_BALANCE → Mes del reporte relativo a la fecha de aplicación del préstamo actual:
    0 = mes de la aplicación
    -1 = un mes antes
•	CNT_INSTALMENT → Número total de cuotas originalmente previstas (puede variar si se reestructura).
•	CNT_INSTALMENT_FUTURE → Número de cuotas pendientes en ese momento.
•	NAME_CONTRACT_STATUS → Estado del contrato ese mes (ej. Active, Completed, Signed, etc.).
•	SK_DPD → Days Past Due = cantidad de días de atraso en ese mes.
•	SK_DPD_DEF → Days Past Due con tolerancia (se ignoran deudas pequeñas).
"""
df_info_summary(pos_cash_balance_df)

                       Non-Null Count  Null Count  % Null    Dtype
SK_ID_PREV                   10001358           0    0.00    int64
SK_ID_CURR                   10001358           0    0.00    int64
MONTHS_BALANCE               10001358           0    0.00    int64
CNT_INSTALMENT                9975287       26071    0.26  float64
CNT_INSTALMENT_FUTURE         9975271       26087    0.26  float64
NAME_CONTRACT_STATUS         10001358           0    0.00   object
SK_DPD                       10001358           0    0.00    int64
SK_DPD_DEF                   10001358           0    0.00    int64


In [94]:
print("Count distinct of SK_ID_PREV: ", pos_cash_balance_df["SK_ID_PREV"].nunique())
print("Count distinct of SK_ID_CURR: ", pos_cash_balance_df["SK_ID_CURR"].nunique())

Count distinct of SK_ID_PREV:  936325
Count distinct of SK_ID_CURR:  337252


In [None]:
 = procesar_tabular_fastai(
    pos_cash_balance_df,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True
)

df_info_summary(pos_cash_balance_df_fastai)

Columnas categóricas procesadas: ['NAME_CONTRACT_STATUS']
Columnas numéricas procesadas: ['CNT_INSTALMENT', 'CNT_INSTALMENT_FUTURE', 'MONTHS_BALANCE', 'SK_DPD', 'SK_DPD_DEF']
Columnas excluidas del procesamiento: ['SK_ID_CURR', 'SK_ID_PREV']
                          Non-Null Count  Null Count  % Null    Dtype
NAME_CONTRACT_STATUS            10001358           0     0.0     int8
CNT_INSTALMENT_na               10001358           0     0.0     int8
CNT_INSTALMENT_FUTURE_na        10001358           0     0.0     int8
CNT_INSTALMENT                  10001358           0     0.0  float64
CNT_INSTALMENT_FUTURE           10001358           0     0.0  float64
MONTHS_BALANCE                  10001358           0     0.0  float64
SK_DPD                          10001358           0     0.0  float64
SK_DPD_DEF                      10001358           0     0.0  float64
SK_ID_CURR                      10001358           0     0.0    int64
SK_ID_PREV                      10001358           0     0

In [None]:
gc.collect()

358

In [97]:
pos_cash_balance_df_agg = resumir_por_id(
    pos_cash_balance_df_fastai,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True,
    nombre_conteo='pos_cash_balance_records'
)

# Mostrar las primeras filas
pos_cash_balance_df_agg.head()

Columnas excluidas: ['SK_ID_PREV']
Columnas resumidas: ['CNT_INSTALMENT', 'CNT_INSTALMENT_FUTURE', 'CNT_INSTALMENT_FUTURE_na', 'CNT_INSTALMENT_na', 'MONTHS_BALANCE', 'NAME_CONTRACT_STATUS', 'SK_DPD', 'SK_DPD_DEF']


Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_STATUS_mean,NAME_CONTRACT_STATUS_min,NAME_CONTRACT_STATUS_max,NAME_CONTRACT_STATUS_median,NAME_CONTRACT_STATUS_sum,CNT_INSTALMENT_na_mean,CNT_INSTALMENT_na_min,CNT_INSTALMENT_na_max,CNT_INSTALMENT_na_median,CNT_INSTALMENT_na_sum,CNT_INSTALMENT_FUTURE_na_mean,CNT_INSTALMENT_FUTURE_na_min,CNT_INSTALMENT_FUTURE_na_max,CNT_INSTALMENT_FUTURE_na_median,CNT_INSTALMENT_FUTURE_na_sum,CNT_INSTALMENT_mean,CNT_INSTALMENT_min,CNT_INSTALMENT_max,CNT_INSTALMENT_median,CNT_INSTALMENT_sum,CNT_INSTALMENT_FUTURE_mean,CNT_INSTALMENT_FUTURE_min,CNT_INSTALMENT_FUTURE_max,CNT_INSTALMENT_FUTURE_median,CNT_INSTALMENT_FUTURE_sum,MONTHS_BALANCE_mean,MONTHS_BALANCE_min,MONTHS_BALANCE_max,MONTHS_BALANCE_median,MONTHS_BALANCE_sum,SK_DPD_mean,SK_DPD_min,SK_DPD_max,SK_DPD_median,SK_DPD_sum,SK_DPD_DEF_mean,SK_DPD_DEF_min,SK_DPD_DEF_max,SK_DPD_DEF_median,SK_DPD_DEF_sum,pos_cash_balance_records_count
0,100001,1.888889,1,5,1.0,17,1.0,1,1,1.0,9,1.0,1,1,1.0,9,-1.091315,-1.091315,-1.091315,-1.091315,-9.821839,-0.813836,-0.944013,-0.583522,-0.85389,-7.324523,-1.440273,-2.339679,-0.690057,-0.84351,-12.962454,-0.081598,-0.087458,-0.034713,-0.087458,-0.734379,0.003764,-0.019976,0.193683,-0.019976,0.033874,9
1,100002,1.0,1,1,1.0,19,1.0,1,1,1.0,19,1.0,1,1,1.0,19,0.577824,0.577824,0.577824,0.577824,10.978658,0.407827,-0.403277,1.218932,0.407827,7.748722,0.959566,0.614296,1.304836,0.959566,18.23175,-0.087458,-0.087458,-0.087458,-0.087458,-1.661705,-0.019976,-0.019976,-0.019976,-0.019976,-0.379547,19
2,100003,1.285714,1,5,1.0,36,1.0,1,1,1.0,28,1.0,1,1,1.0,28,-0.581632,-0.924401,-0.42366,-0.42366,-16.285689,-0.422589,-0.944013,0.137459,-0.403277,-11.832489,-0.336566,-1.610776,0.652659,0.326571,-9.423854,-0.087458,-0.087458,-0.087458,-0.087458,-2.448829,-0.019976,-0.019976,-0.019976,-0.019976,-0.559332,28
3,100004,2.0,1,5,1.0,8,1.0,1,1,1.0,4,1.0,1,1,1.0,4,-1.11218,-1.174772,-1.091315,-1.091315,-4.448719,-0.741237,-0.944013,-0.583522,-0.718706,-2.964948,0.364934,0.307389,0.422479,0.364934,1.459738,-0.087458,-0.087458,-0.087458,-0.087458,-0.349833,-0.019976,-0.019976,-0.019976,-0.019976,-0.079905,4
4,100005,2.0,1,8,1.0,22,1.090909,1,2,1.0,12,1.090909,1,2,1.0,12,-0.446421,-0.674031,-0.42366,-0.42366,-4.910627,-0.296768,-0.944013,0.137459,-0.313154,-3.264451,0.575933,0.384116,0.767749,0.575933,6.335259,-0.087458,-0.087458,-0.087458,-0.087458,-0.96204,-0.019976,-0.019976,-0.019976,-0.019976,-0.219738,11


In [98]:
len(pos_cash_balance_df_agg)

337252

In [99]:
df_info_summary(pos_cash_balance_df_agg)

                                 Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                               337252           0     0.0    int64
NAME_CONTRACT_STATUS_mean                337252           0     0.0  float64
NAME_CONTRACT_STATUS_min                 337252           0     0.0     int8
NAME_CONTRACT_STATUS_max                 337252           0     0.0     int8
NAME_CONTRACT_STATUS_median              337252           0     0.0  float64
NAME_CONTRACT_STATUS_sum                 337252           0     0.0    int64
CNT_INSTALMENT_na_mean                   337252           0     0.0  float64
CNT_INSTALMENT_na_min                    337252           0     0.0     int8
CNT_INSTALMENT_na_max                    337252           0     0.0     int8
CNT_INSTALMENT_na_median                 337252           0     0.0  float64
CNT_INSTALMENT_na_sum                    337252           0     0.0    int64
CNT_INSTALMENT_FUTURE_na_mean            337252           0     0.0  float64

In [100]:
print("Count distinct of SK_ID_PREV: ", pos_cash_balance_df_agg["SK_ID_CURR"].nunique())

Count distinct of SK_ID_PREV:  337252


In [101]:
del pos_cash_balance_df
gc.collect()

0

### Data prep - installments_payments

In [102]:
len(installments_payments_df)

13605401

In [103]:
# Info sobre las columnas del installments_payments_df
"""
Histórico de pagos de cuotas de créditos anteriores.
•	SK_ID_PREV → ID del crédito previo.
•	SK_ID_CURR → ID del préstamo actual.
•	NUM_INSTALMENT_VERSION → Versión del calendario de pagos (0 si es tarjeta de crédito). Un cambio significa renegociación.
•	NUM_INSTALMENT_NUMBER → Número de cuota (1, 2, 3, …).
•	DAYS_INSTALMENT → Día en que debía pagarse la cuota (relativo a aplicación actual).
•	DAYS_ENTRY_PAYMENT → Día en que efectivamente se pagó (NaN si no se pagó).
•	AMT_INSTALMENT → Monto esperado de la cuota.
•	AMT_PAYMENT → Monto realmente pagado.
"""
df_info_summary(installments_payments_df)

                        Non-Null Count  Null Count  % Null    Dtype
SK_ID_PREV                    13605401           0    0.00    int64
SK_ID_CURR                    13605401           0    0.00    int64
NUM_INSTALMENT_VERSION        13605401           0    0.00  float64
NUM_INSTALMENT_NUMBER         13605401           0    0.00    int64
DAYS_INSTALMENT               13605401           0    0.00  float64
DAYS_ENTRY_PAYMENT            13602496        2905    0.02  float64
AMT_INSTALMENT                13605401           0    0.00  float64
AMT_PAYMENT                   13602496        2905    0.02  float64


In [104]:
installments_payments_df_fastai = procesar_tabular_fastai(
    installments_payments_df,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True
)

df_info_summary(installments_payments_df_fastai)

Columnas categóricas procesadas: []
Columnas numéricas procesadas: ['AMT_INSTALMENT', 'AMT_PAYMENT', 'DAYS_ENTRY_PAYMENT', 'DAYS_INSTALMENT', 'NUM_INSTALMENT_NUMBER', 'NUM_INSTALMENT_VERSION']
Columnas excluidas del procesamiento: ['SK_ID_CURR', 'SK_ID_PREV']
                        Non-Null Count  Null Count  % Null    Dtype
AMT_PAYMENT_na                13605401           0     0.0     int8
DAYS_ENTRY_PAYMENT_na         13605401           0     0.0     int8
AMT_INSTALMENT                13605401           0     0.0  float64
AMT_PAYMENT                   13605401           0     0.0  float64
DAYS_ENTRY_PAYMENT            13605401           0     0.0  float64
DAYS_INSTALMENT               13605401           0     0.0  float64
NUM_INSTALMENT_NUMBER         13605401           0     0.0  float64
NUM_INSTALMENT_VERSION        13605401           0     0.0  float64
SK_ID_CURR                    13605401           0     0.0    int64
SK_ID_PREV                    13605401           0     0.0  

In [105]:
"""
Por lo visto anteriormente, los NaN en DAYS_ENTRY_PAYMENT y AMT_PAYMENT indican cuotas no pagadas. 
Vamos a crear las siguientes columnas:
- instalments_unpaid: cantidad de cuotas no abonadas (DAYS_ENTRY_PAYMENT no NaN)
- instalments_partially_paid: cantidad de cuotas abonadas parcialmente (DAYS_ENTRY_PAYMENT no NaN y AMT_PAYMENT < AMT_INSTALMENT)
- instalments_overdue: cantidad de cuotas vencidas y no pagadas (DAYS_ENTRY_PAYMENT NaN y DAYS_INSTALMENT < 0)
- amount debt: monto adeudado (AMT_INSTALMENT - AMT_PAYMENT) si no se pagó y si se pagó menos de lo debido
- dpd: días de atraso en el pago:
    - Si se pagó (DAYS_ENTRY_PAYMENT no es NaN): max(0, DAYS_ENTRY_PAYMENT - DAYS_INSTALMENT)
    - Si no se pagó y está vencido: abs(DAYS_INSTALMENT) si DAYS_INSTALMENT < 0
"""

installments_payments_df_fastai["instalments_unpaid"] = installments_payments_df_fastai["DAYS_ENTRY_PAYMENT"].isna().astype(int)

installments_payments_df_fastai["instalments_partially_paid"] = (
    (~installments_payments_df_fastai["DAYS_ENTRY_PAYMENT"].isna()) & 
    (installments_payments_df_fastai["AMT_PAYMENT"] < installments_payments_df_fastai["AMT_INSTALMENT"]) &
    (installments_payments_df_fastai["AMT_PAYMENT"] > 0)
).astype(int)

installments_payments_df_fastai["instalments_overdue"] = ((installments_payments_df_fastai["DAYS_ENTRY_PAYMENT"].isna()) & (installments_payments_df_fastai["DAYS_INSTALMENT"] < 0)).astype(int)

# Calcular monto adeudado
installments_payments_df_fastai["amount_debt"] = installments_payments_df_fastai.apply(
    # lambda row: (row["AMT_INSTALMENT"] - row["AMT_PAYMENT"]) if (pd.notna(row["AMT_PAYMENT"]) or row["AMT_PAYMENT"] < row["AMT_INSTALMENT"]) else 0,
    # axis=1
    lambda row: max(0, row["AMT_INSTALMENT"] - row["AMT_PAYMENT"]) if pd.notna(row["AMT_PAYMENT"]) 
    else abs(row["AMT_INSTALMENT"]), 
    axis=1
).round(2)

# Calcular dpd considerando ambos casos
installments_payments_df_fastai["dpd"] = installments_payments_df_fastai.apply(
    lambda row: max(0, row["DAYS_ENTRY_PAYMENT"] - row["DAYS_INSTALMENT"]) if pd.notna(row["DAYS_ENTRY_PAYMENT"]) 
    else abs(row["DAYS_INSTALMENT"]) if row["DAYS_INSTALMENT"] < 0 else 0, 
    axis=1
).astype(int)

installments_payments_df_fastai.head()

Unnamed: 0,AMT_PAYMENT_na,DAYS_ENTRY_PAYMENT_na,AMT_INSTALMENT,AMT_PAYMENT,DAYS_ENTRY_PAYMENT,DAYS_INSTALMENT,NUM_INSTALMENT_NUMBER,NUM_INSTALMENT_VERSION,SK_ID_CURR,SK_ID_PREV,instalments_unpaid,instalments_partially_paid,instalments_overdue,amount_debt,dpd
0,1,1,-0.199773,-0.187976,-0.16981,-0.171959,-0.482706,0.138486,161674,1054186,0,0,0,0.0,0
1,1,1,-0.303229,-0.283569,-1.380293,-1.390518,0.567397,-0.827496,151639,1330831,0,0,0,0.0,0
2,1,1,0.165593,0.14962,1.2343,1.222641,-0.670224,1.104468,193053,2085231,0,1,0,0.02,0
3,1,1,0.144338,0.129981,-1.717579,-1.717631,-0.595217,0.138486,199697,2452527,0,1,0,0.01,0
4,1,1,-0.29436,-0.275455,-0.393418,-0.425409,-0.63272,0.138486,167756,2714724,0,0,0,0.0,0


In [107]:
print("Count distinct of SK_ID_PREV: ", installments_payments_df_fastai["SK_ID_CURR"].nunique())

Count distinct of SK_ID_PREV:  339587


In [109]:
df_info_summary(installments_payments_df_fastai)

                            Non-Null Count  Null Count  % Null    Dtype
AMT_PAYMENT_na                    13605401           0     0.0     int8
DAYS_ENTRY_PAYMENT_na             13605401           0     0.0     int8
AMT_INSTALMENT                    13605401           0     0.0  float64
AMT_PAYMENT                       13605401           0     0.0  float64
DAYS_ENTRY_PAYMENT                13605401           0     0.0  float64
DAYS_INSTALMENT                   13605401           0     0.0  float64
NUM_INSTALMENT_NUMBER             13605401           0     0.0  float64
NUM_INSTALMENT_VERSION            13605401           0     0.0  float64
SK_ID_CURR                        13605401           0     0.0    int64
SK_ID_PREV                        13605401           0     0.0    int64
instalments_unpaid                13605401           0     0.0    int64
instalments_partially_paid        13605401           0     0.0    int64
instalments_overdue               13605401           0     0.0  

In [110]:
installments_payments_df_agg = resumir_por_id(
    installments_payments_df_fastai,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True,
    nombre_conteo='installments_payments_records'
)

# Mostrar las primeras filas
installments_payments_df_agg.head()

Columnas excluidas: ['SK_ID_PREV']
Columnas resumidas: ['AMT_INSTALMENT', 'AMT_PAYMENT', 'AMT_PAYMENT_na', 'DAYS_ENTRY_PAYMENT', 'DAYS_ENTRY_PAYMENT_na', 'DAYS_INSTALMENT', 'NUM_INSTALMENT_NUMBER', 'NUM_INSTALMENT_VERSION', 'amount_debt', 'dpd', 'instalments_overdue', 'instalments_partially_paid', 'instalments_unpaid']


Unnamed: 0,SK_ID_CURR,AMT_PAYMENT_na_mean,AMT_PAYMENT_na_min,AMT_PAYMENT_na_max,AMT_PAYMENT_na_median,AMT_PAYMENT_na_sum,DAYS_ENTRY_PAYMENT_na_mean,DAYS_ENTRY_PAYMENT_na_min,DAYS_ENTRY_PAYMENT_na_max,DAYS_ENTRY_PAYMENT_na_median,DAYS_ENTRY_PAYMENT_na_sum,AMT_INSTALMENT_mean,AMT_INSTALMENT_min,AMT_INSTALMENT_max,AMT_INSTALMENT_median,AMT_INSTALMENT_sum,AMT_PAYMENT_mean,AMT_PAYMENT_min,AMT_PAYMENT_max,AMT_PAYMENT_median,AMT_PAYMENT_sum,DAYS_ENTRY_PAYMENT_mean,DAYS_ENTRY_PAYMENT_min,DAYS_ENTRY_PAYMENT_max,DAYS_ENTRY_PAYMENT_median,DAYS_ENTRY_PAYMENT_sum,DAYS_INSTALMENT_mean,DAYS_INSTALMENT_min,DAYS_INSTALMENT_max,DAYS_INSTALMENT_median,DAYS_INSTALMENT_sum,NUM_INSTALMENT_NUMBER_mean,NUM_INSTALMENT_NUMBER_min,NUM_INSTALMENT_NUMBER_max,NUM_INSTALMENT_NUMBER_median,NUM_INSTALMENT_NUMBER_sum,NUM_INSTALMENT_VERSION_mean,NUM_INSTALMENT_VERSION_min,NUM_INSTALMENT_VERSION_max,NUM_INSTALMENT_VERSION_median,NUM_INSTALMENT_VERSION_sum,instalments_unpaid_mean,instalments_unpaid_min,instalments_unpaid_max,instalments_unpaid_median,instalments_unpaid_sum,instalments_partially_paid_mean,instalments_partially_paid_min,instalments_partially_paid_max,instalments_partially_paid_median,instalments_partially_paid_sum,instalments_overdue_mean,instalments_overdue_min,instalments_overdue_max,instalments_overdue_median,instalments_overdue_sum,amount_debt_mean,amount_debt_min,amount_debt_max,amount_debt_median,amount_debt_sum,dpd_mean,dpd_min,dpd_max,dpd_median,dpd_sum,installments_payments_records_count
0,100001,1.0,1,1,1.0,7,1.0,1,1,1.0,7,-0.220797,-0.259044,0.006862,-0.258452,-1.545581,-0.207402,-0.242742,0.002953,-0.242195,-1.451816,-1.429012,-2.329691,-0.720711,-0.829392,-10.003084,-1.430114,-2.339395,-0.720061,-0.832428,-10.010797,-0.605932,-0.670224,-0.557713,-0.595217,-4.241524,0.276483,0.138486,1.104468,0.138486,1.935383,0.0,0,0,0.0,0,0.142857,0,1,0.0,1,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,7
1,100002,1.0,1,1,1.0,19,1.0,1,1,1.0,19,-0.108595,-0.154224,0.712728,-0.154224,-2.063298,-0.103728,-0.145889,0.655169,-0.145889,-1.970828,0.918973,0.579715,1.251789,0.923247,17.460495,0.932984,0.595883,1.270085,0.932984,17.726696,-0.332691,-0.670224,0.004842,-0.332691,-6.32113,0.189327,0.138486,1.104468,0.138486,3.597213,0.0,0,0,0.0,0,0.052632,0,1,0.0,1,0.0,0,0,0.0,0,0.003158,0.0,0.06,0.0,0.06,0.0,0,0,0.0,0,19
2,100003,1.0,1,1,1.0,25,1.0,1,1,1.0,25,0.943315,-0.205416,10.75305,0.933844,23.582875,0.86823,-0.19319,9.93236,0.859478,21.705746,-0.417553,-1.59016,0.633431,0.306138,-10.438827,-0.419366,-1.58279,0.63209,0.306225,-10.484161,-0.517209,-0.670224,-0.257684,-0.520209,-12.930226,0.177125,0.138486,1.104468,0.138486,4.428128,0.0,0,0,0.0,0,0.52,0,1,1.0,13,0.0,0,0,0.0,0,0.0808,0.0,0.82,0.07,2.02,0.0,0,0,0.0,0,25
3,100004,1.0,1,1,1.0,3,1.0,1,1,1.0,3,-0.19685,-0.231236,-0.128078,-0.231236,-0.59055,-0.185275,-0.217047,-0.12173,-0.217047,-0.555825,0.36152,0.319879,0.404826,0.359854,1.084559,0.359912,0.322456,0.397368,0.359912,1.079735,-0.63272,-0.670224,-0.595217,-0.63272,-1.898161,0.46048,0.138486,1.104468,0.138486,1.38144,0.0,0,0,0.0,0,0.0,0,0,0.0,0,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,3
4,100005,1.0,1,1,1.0,9,1.0,1,1,1.0,9,-0.213776,-0.241994,0.01197,-0.241994,-1.923983,-0.200915,-0.226988,0.007673,-0.226988,-1.808231,0.551538,0.393583,0.725872,0.582213,4.963844,0.569664,0.419841,0.719487,0.569664,5.126974,-0.520209,-0.670224,-0.370195,-0.520209,-4.681884,0.245817,0.138486,1.104468,0.138486,2.212355,0.0,0,0,0.0,0,0.111111,0,1,0.0,1,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,9


In [111]:
len(installments_payments_df_agg)

339587

In [112]:
df_info_summary(installments_payments_df_agg)

                                     Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                                   339587           0     0.0    int64
AMT_PAYMENT_na_mean                          339587           0     0.0  float64
AMT_PAYMENT_na_min                           339587           0     0.0     int8
AMT_PAYMENT_na_max                           339587           0     0.0     int8
AMT_PAYMENT_na_median                        339587           0     0.0  float64
AMT_PAYMENT_na_sum                           339587           0     0.0    int64
DAYS_ENTRY_PAYMENT_na_mean                   339587           0     0.0  float64
DAYS_ENTRY_PAYMENT_na_min                    339587           0     0.0     int8
DAYS_ENTRY_PAYMENT_na_max                    339587           0     0.0     int8
DAYS_ENTRY_PAYMENT_na_median                 339587           0     0.0  float64
DAYS_ENTRY_PAYMENT_na_sum                    339587           0     0.0    int64
AMT_INSTALMENT_mean         

In [113]:
print("Count distinct of SK_ID_CURR: ", installments_payments_df_agg["SK_ID_CURR"].nunique())

Count distinct of SK_ID_CURR:  339587


In [114]:
del installments_payments_df
gc.collect()

188

### Data prep - credit_card_balance

In [115]:
len(credit_card_balance_df)

3840312

In [116]:
# Info sobre las columnas del credit_card_balance_df
"""
Histórico mensual del comportamiento de las tarjetas de crédito.
•	SK_ID_PREV → ID del crédito previo (relacionado a tarjeta).
•	SK_ID_CURR → ID del préstamo en nuestro dataset principal.
•	MONTHS_BALANCE → Mes relativo a la aplicación actual (0 = aplicación, -1 = un mes antes).
•	AMT_BALANCE → Balance actual de la tarjeta ese mes.
•	AMT_CREDIT_LIMIT_ACTUAL → Límite de crédito vigente ese mes.
•	AMT_DRAWINGS_ATM_CURRENT → Monto retirado en cajero automático ese mes.
•	AMT_DRAWINGS_CURRENT → Monto total retirado ese mes (ATM + POS + otros).
•	AMT_DRAWINGS_OTHER_CURRENT → Monto retirado en canales distintos a cajero o POS.
•	AMT_DRAWINGS_POS_CURRENT → Monto gastado en POS (compras).
•	AMT_INST_MIN_REGULARITY → Pago mínimo requerido ese mes.
•	AMT_PAYMENT_CURRENT → Pago realizado ese mes.
•	AMT_PAYMENT_TOTAL_CURRENT → Pagos totales realizados ese mes (suma de todos los abonos).
•	AMT_RECEIVABLE_PRINCIPAL → Principal pendiente de pago.
•	AMT_RECIVABLE → Total pendiente (principal + intereses).
•	AMT_TOTAL_RECEIVABLE → Variante de cálculo del total pendiente.
•	CNT_DRAWINGS_ATM_CURRENT → Número de retiros en cajero ese mes.
•	CNT_DRAWINGS_CURRENT → Número total de operaciones con la tarjeta ese mes.
•	CNT_DRAWINGS_OTHER_CURRENT → Número de operaciones en otros canales.
•	CNT_DRAWINGS_POS_CURRENT → Número de operaciones de compra en POS.
•	CNT_INSTALMENT_MATURE_CUM → Número acumulado de cuotas ya pagadas.
•	NAME_CONTRACT_STATUS → Estado del contrato (ej. Active, Completed).
•	SK_DPD → Días de atraso ese mes.
•	SK_DPD_DEF → Días de atraso con criterio más estricto (ignora deudas pequeñas).
"""
df_info_summary(credit_card_balance_df)

                            Non-Null Count  Null Count  % Null    Dtype
SK_ID_PREV                         3840312           0    0.00    int64
SK_ID_CURR                         3840312           0    0.00    int64
MONTHS_BALANCE                     3840312           0    0.00    int64
AMT_BALANCE                        3840312           0    0.00  float64
AMT_CREDIT_LIMIT_ACTUAL            3840312           0    0.00    int64
AMT_DRAWINGS_ATM_CURRENT           3090496      749816   19.52  float64
AMT_DRAWINGS_CURRENT               3840312           0    0.00  float64
AMT_DRAWINGS_OTHER_CURRENT         3090496      749816   19.52  float64
AMT_DRAWINGS_POS_CURRENT           3090496      749816   19.52  float64
AMT_INST_MIN_REGULARITY            3535076      305236    7.95  float64
AMT_PAYMENT_CURRENT                3072324      767988   20.00  float64
AMT_PAYMENT_TOTAL_CURRENT          3840312           0    0.00  float64
AMT_RECEIVABLE_PRINCIPAL           3840312           0    0.00  

In [117]:
credit_card_balance_df_fastai = procesar_tabular_fastai(
    credit_card_balance_df,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True
)

df_info_summary(credit_card_balance_df_fastai)

Columnas categóricas procesadas: ['NAME_CONTRACT_STATUS']
Columnas numéricas procesadas: ['AMT_BALANCE', 'AMT_CREDIT_LIMIT_ACTUAL', 'AMT_DRAWINGS_ATM_CURRENT', 'AMT_DRAWINGS_CURRENT', 'AMT_DRAWINGS_OTHER_CURRENT', 'AMT_DRAWINGS_POS_CURRENT', 'AMT_INST_MIN_REGULARITY', 'AMT_PAYMENT_CURRENT', 'AMT_PAYMENT_TOTAL_CURRENT', 'AMT_RECEIVABLE_PRINCIPAL', 'AMT_RECIVABLE', 'AMT_TOTAL_RECEIVABLE', 'CNT_DRAWINGS_ATM_CURRENT', 'CNT_DRAWINGS_CURRENT', 'CNT_DRAWINGS_OTHER_CURRENT', 'CNT_DRAWINGS_POS_CURRENT', 'CNT_INSTALMENT_MATURE_CUM', 'MONTHS_BALANCE', 'SK_DPD', 'SK_DPD_DEF']
Columnas excluidas del procesamiento: ['SK_ID_CURR', 'SK_ID_PREV']
                               Non-Null Count  Null Count  % Null    Dtype
NAME_CONTRACT_STATUS                  3840312           0     0.0     int8
AMT_DRAWINGS_ATM_CURRENT_na           3840312           0     0.0     int8
AMT_DRAWINGS_OTHER_CURRENT_na         3840312           0     0.0     int8
AMT_DRAWINGS_POS_CURRENT_na           3840312           0     

In [118]:
print("Count distinct of SK_ID_PREV: ", credit_card_balance_df_fastai["SK_ID_PREV"].nunique())
print("Count distinct of SK_ID_CURR: ", credit_card_balance_df_fastai["SK_ID_CURR"].nunique())

Count distinct of SK_ID_PREV:  104307
Count distinct of SK_ID_CURR:  103558


In [120]:
credit_card_balance_df_agg = resumir_por_id(
    credit_card_balance_df_fastai,
    id_col='SK_ID_CURR',
    excluir_cols=['SK_ID_PREV'],
    verbose=True,
    nombre_conteo='credit_card_balance_records'
)

# Mostrar las primeras filas
credit_card_balance_df_agg.head()

Columnas excluidas: ['SK_ID_PREV']
Columnas resumidas: ['AMT_BALANCE', 'AMT_CREDIT_LIMIT_ACTUAL', 'AMT_DRAWINGS_ATM_CURRENT', 'AMT_DRAWINGS_ATM_CURRENT_na', 'AMT_DRAWINGS_CURRENT', 'AMT_DRAWINGS_OTHER_CURRENT', 'AMT_DRAWINGS_OTHER_CURRENT_na', 'AMT_DRAWINGS_POS_CURRENT', 'AMT_DRAWINGS_POS_CURRENT_na', 'AMT_INST_MIN_REGULARITY', 'AMT_INST_MIN_REGULARITY_na', 'AMT_PAYMENT_CURRENT', 'AMT_PAYMENT_CURRENT_na', 'AMT_PAYMENT_TOTAL_CURRENT', 'AMT_RECEIVABLE_PRINCIPAL', 'AMT_RECIVABLE', 'AMT_TOTAL_RECEIVABLE', 'CNT_DRAWINGS_ATM_CURRENT', 'CNT_DRAWINGS_ATM_CURRENT_na', 'CNT_DRAWINGS_CURRENT', 'CNT_DRAWINGS_OTHER_CURRENT', 'CNT_DRAWINGS_OTHER_CURRENT_na', 'CNT_DRAWINGS_POS_CURRENT', 'CNT_DRAWINGS_POS_CURRENT_na', 'CNT_INSTALMENT_MATURE_CUM', 'CNT_INSTALMENT_MATURE_CUM_na', 'MONTHS_BALANCE', 'NAME_CONTRACT_STATUS', 'SK_DPD', 'SK_DPD_DEF']


Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_STATUS_mean,NAME_CONTRACT_STATUS_min,NAME_CONTRACT_STATUS_max,NAME_CONTRACT_STATUS_median,NAME_CONTRACT_STATUS_sum,AMT_DRAWINGS_ATM_CURRENT_na_mean,AMT_DRAWINGS_ATM_CURRENT_na_min,AMT_DRAWINGS_ATM_CURRENT_na_max,AMT_DRAWINGS_ATM_CURRENT_na_median,AMT_DRAWINGS_ATM_CURRENT_na_sum,AMT_DRAWINGS_OTHER_CURRENT_na_mean,AMT_DRAWINGS_OTHER_CURRENT_na_min,AMT_DRAWINGS_OTHER_CURRENT_na_max,AMT_DRAWINGS_OTHER_CURRENT_na_median,AMT_DRAWINGS_OTHER_CURRENT_na_sum,AMT_DRAWINGS_POS_CURRENT_na_mean,AMT_DRAWINGS_POS_CURRENT_na_min,AMT_DRAWINGS_POS_CURRENT_na_max,AMT_DRAWINGS_POS_CURRENT_na_median,AMT_DRAWINGS_POS_CURRENT_na_sum,AMT_INST_MIN_REGULARITY_na_mean,AMT_INST_MIN_REGULARITY_na_min,AMT_INST_MIN_REGULARITY_na_max,AMT_INST_MIN_REGULARITY_na_median,AMT_INST_MIN_REGULARITY_na_sum,AMT_PAYMENT_CURRENT_na_mean,AMT_PAYMENT_CURRENT_na_min,AMT_PAYMENT_CURRENT_na_max,AMT_PAYMENT_CURRENT_na_median,AMT_PAYMENT_CURRENT_na_sum,CNT_DRAWINGS_ATM_CURRENT_na_mean,CNT_DRAWINGS_ATM_CURRENT_na_min,CNT_DRAWINGS_ATM_CURRENT_na_max,CNT_DRAWINGS_ATM_CURRENT_na_median,CNT_DRAWINGS_ATM_CURRENT_na_sum,CNT_DRAWINGS_OTHER_CURRENT_na_mean,CNT_DRAWINGS_OTHER_CURRENT_na_min,CNT_DRAWINGS_OTHER_CURRENT_na_max,CNT_DRAWINGS_OTHER_CURRENT_na_median,CNT_DRAWINGS_OTHER_CURRENT_na_sum,CNT_DRAWINGS_POS_CURRENT_na_mean,CNT_DRAWINGS_POS_CURRENT_na_min,CNT_DRAWINGS_POS_CURRENT_na_max,CNT_DRAWINGS_POS_CURRENT_na_median,CNT_DRAWINGS_POS_CURRENT_na_sum,CNT_INSTALMENT_MATURE_CUM_na_mean,CNT_INSTALMENT_MATURE_CUM_na_min,CNT_INSTALMENT_MATURE_CUM_na_max,CNT_INSTALMENT_MATURE_CUM_na_median,CNT_INSTALMENT_MATURE_CUM_na_sum,AMT_BALANCE_mean,AMT_BALANCE_min,AMT_BALANCE_max,AMT_BALANCE_median,AMT_BALANCE_sum,AMT_CREDIT_LIMIT_ACTUAL_mean,AMT_CREDIT_LIMIT_ACTUAL_min,AMT_CREDIT_LIMIT_ACTUAL_max,AMT_CREDIT_LIMIT_ACTUAL_median,AMT_CREDIT_LIMIT_ACTUAL_sum,AMT_DRAWINGS_ATM_CURRENT_mean,AMT_DRAWINGS_ATM_CURRENT_min,AMT_DRAWINGS_ATM_CURRENT_max,AMT_DRAWINGS_ATM_CURRENT_median,AMT_DRAWINGS_ATM_CURRENT_sum,AMT_DRAWINGS_CURRENT_mean,AMT_DRAWINGS_CURRENT_min,AMT_DRAWINGS_CURRENT_max,AMT_DRAWINGS_CURRENT_median,AMT_DRAWINGS_CURRENT_sum,AMT_DRAWINGS_OTHER_CURRENT_mean,AMT_DRAWINGS_OTHER_CURRENT_min,AMT_DRAWINGS_OTHER_CURRENT_max,AMT_DRAWINGS_OTHER_CURRENT_median,AMT_DRAWINGS_OTHER_CURRENT_sum,AMT_DRAWINGS_POS_CURRENT_mean,AMT_DRAWINGS_POS_CURRENT_min,AMT_DRAWINGS_POS_CURRENT_max,AMT_DRAWINGS_POS_CURRENT_median,AMT_DRAWINGS_POS_CURRENT_sum,AMT_INST_MIN_REGULARITY_mean,AMT_INST_MIN_REGULARITY_min,AMT_INST_MIN_REGULARITY_max,AMT_INST_MIN_REGULARITY_median,AMT_INST_MIN_REGULARITY_sum,AMT_PAYMENT_CURRENT_mean,AMT_PAYMENT_CURRENT_min,AMT_PAYMENT_CURRENT_max,AMT_PAYMENT_CURRENT_median,AMT_PAYMENT_CURRENT_sum,AMT_PAYMENT_TOTAL_CURRENT_mean,AMT_PAYMENT_TOTAL_CURRENT_min,AMT_PAYMENT_TOTAL_CURRENT_max,AMT_PAYMENT_TOTAL_CURRENT_median,AMT_PAYMENT_TOTAL_CURRENT_sum,AMT_RECEIVABLE_PRINCIPAL_mean,AMT_RECEIVABLE_PRINCIPAL_min,AMT_RECEIVABLE_PRINCIPAL_max,AMT_RECEIVABLE_PRINCIPAL_median,AMT_RECEIVABLE_PRINCIPAL_sum,AMT_RECIVABLE_mean,AMT_RECIVABLE_min,AMT_RECIVABLE_max,AMT_RECIVABLE_median,AMT_RECIVABLE_sum,AMT_TOTAL_RECEIVABLE_mean,AMT_TOTAL_RECEIVABLE_min,AMT_TOTAL_RECEIVABLE_max,AMT_TOTAL_RECEIVABLE_median,AMT_TOTAL_RECEIVABLE_sum,CNT_DRAWINGS_ATM_CURRENT_mean,CNT_DRAWINGS_ATM_CURRENT_min,CNT_DRAWINGS_ATM_CURRENT_max,CNT_DRAWINGS_ATM_CURRENT_median,CNT_DRAWINGS_ATM_CURRENT_sum,CNT_DRAWINGS_CURRENT_mean,CNT_DRAWINGS_CURRENT_min,CNT_DRAWINGS_CURRENT_max,CNT_DRAWINGS_CURRENT_median,CNT_DRAWINGS_CURRENT_sum,CNT_DRAWINGS_OTHER_CURRENT_mean,CNT_DRAWINGS_OTHER_CURRENT_min,CNT_DRAWINGS_OTHER_CURRENT_max,CNT_DRAWINGS_OTHER_CURRENT_median,CNT_DRAWINGS_OTHER_CURRENT_sum,CNT_DRAWINGS_POS_CURRENT_mean,CNT_DRAWINGS_POS_CURRENT_min,CNT_DRAWINGS_POS_CURRENT_max,CNT_DRAWINGS_POS_CURRENT_median,CNT_DRAWINGS_POS_CURRENT_sum,CNT_INSTALMENT_MATURE_CUM_mean,CNT_INSTALMENT_MATURE_CUM_min,CNT_INSTALMENT_MATURE_CUM_max,CNT_INSTALMENT_MATURE_CUM_median,CNT_INSTALMENT_MATURE_CUM_sum,MONTHS_BALANCE_mean,MONTHS_BALANCE_min,MONTHS_BALANCE_max,MONTHS_BALANCE_median,MONTHS_BALANCE_sum,SK_DPD_mean,SK_DPD_min,SK_DPD_max,SK_DPD_median,SK_DPD_sum,SK_DPD_DEF_mean,SK_DPD_DEF_min,SK_DPD_DEF_max,SK_DPD_DEF_median,SK_DPD_DEF_sum,credit_card_balance_records_count
0,100006,1.0,1,1,1.0,6,2.0,2,2,2.0,12,2.0,2,2,2.0,12,2.0,2,2,2.0,12,1.0,1,1,1.0,6,2.0,2,2,2.0,12,2.0,2,2,2.0,12,2.0,2,2,2.0,12,2.0,2,2,2.0,12,1.0,1,1,1.0,6,-0.548413,-0.548413,-0.548413,-0.548413,-3.290478,0.703573,0.703573,0.703573,0.703573,4.221438,-0.188645,-0.188645,-0.188645,-0.188645,-1.131872,-0.219623,-0.219623,-0.219623,-0.219623,-1.31774,-0.031514,-0.031514,-0.031514,-0.031514,-0.189086,-0.127806,-0.127806,-0.127806,-0.127806,-0.766837,-0.59711,-0.59711,-0.59711,-0.59711,-3.582661,-0.187044,-0.187044,-0.187044,-0.187044,-1.122267,-0.237107,-0.237107,-0.237107,-0.237107,-1.422645,-0.54583,-0.54583,-0.54583,-0.54583,-3.274978,-0.548187,-0.548187,-0.548187,-0.548187,-3.289121,-0.548243,-0.548243,-0.548243,-0.548243,-3.289458,-0.250347,-0.250347,-0.250347,-0.250347,-1.50208,-0.220397,-0.220397,-0.220397,-0.220397,-1.322384,-0.052224,-0.052224,-0.052224,-0.052224,-0.313347,-0.154427,-0.154427,-0.154427,-0.154427,-0.926561,-1.054892,-1.054892,-1.054892,-1.054892,-6.329351,1.163275,1.069529,1.257021,1.163275,6.979649,-0.095202,-0.095202,-0.095202,-0.095202,-0.571211,-0.015439,-0.015439,-0.015439,-0.015439,-0.092635,6
1,100011,1.0,1,1,1.0,74,1.0,1,1,1.0,74,1.0,1,1,1.0,74,1.0,1,1,1.0,74,1.013514,1,2,1.0,75,1.0,1,1,1.0,74,1.0,1,1,1.0,74,1.0,1,1,1.0,74,1.0,1,1,1.0,74,1.013514,1,2,1.0,75,-0.035915,-0.548413,1.229456,-0.548413,-2.65773,0.062861,-0.386374,0.1586,0.1586,4.651718,-0.092996,-0.188645,6.889413,-0.188645,-6.881699,-0.147756,-0.219623,5.098571,-0.219623,-10.933935,-0.031514,-0.031514,-0.031514,-0.031514,-2.332057,-0.127806,-0.127806,-0.127806,-0.127806,-9.457659,0.117988,-0.59711,1.05195,-0.59711,8.73112,-0.121008,-0.270431,1.441454,-0.25305,-8.954558,-0.095882,-0.237107,1.496475,-0.237107,-7.095249,-0.034757,-0.54583,1.209692,-0.54583,-2.572037,-0.034498,-0.553503,1.235415,-0.548187,-2.55288,-0.034586,-0.553559,1.235251,-0.548243,-2.55934,-0.196007,-0.250347,3.770809,-0.250347,-14.504493,-0.203454,-0.220397,1.033385,-0.220397,-15.055622,-0.052224,-0.052224,-0.052224,-0.052224,-3.86461,-0.154427,-0.154427,-0.154427,-0.154427,-11.427585,0.272479,-1.003085,0.654728,0.654728,20.163413,-0.149172,-1.517866,1.219523,-0.149172,-11.038723,-0.095202,-0.095202,-0.095202,-0.095202,-7.044932,-0.015439,-0.015439,-0.015439,-0.015439,-1.1425,74
2,100013,1.0,1,1,1.0,96,1.0625,1,2,1.0,102,1.0625,1,2,1.0,102,1.0625,1,2,1.0,102,1.072917,1,2,1.0,103,1.0,1,1,1.0,96,1.0625,1,2,1.0,102,1.0625,1,2,1.0,102,1.0625,1,2,1.0,102,1.072917,1,2,1.0,103,-0.377588,-0.548413,0.970021,-0.548413,-36.248433,-0.133756,-0.65886,0.022356,0.022356,-12.840566,0.045447,-0.188645,6.004656,-0.188645,4.36288,-0.043735,-0.219623,4.433797,-0.219623,-4.198576,-0.031514,-0.031514,-0.031514,-0.031514,-3.025372,-0.127806,-0.127806,-0.127806,-0.127806,-12.269395,-0.35003,-0.59711,0.845817,-0.59711,-33.602858,-0.049265,-0.270431,4.470921,-0.261968,-4.729474,-0.024111,-0.237107,4.564338,-0.237107,-2.314621,-0.377538,-0.54583,0.990252,-0.54583,-36.243633,-0.377366,-0.550776,0.975143,-0.548187,-36.227144,-0.377433,-0.550832,0.974995,-0.548243,-36.233527,-0.009496,-0.250347,6.786676,-0.250347,-0.911628,-0.145301,-0.220397,1.973722,-0.220397,-13.948897,-0.052224,-0.052224,-0.052224,-0.052224,-5.013548,-0.154427,-0.154427,-0.154427,-0.154427,-14.824975,-0.099167,-1.003085,0.084854,0.084854,-9.520038,-0.524157,-2.305335,1.257021,-0.524157,-50.319046,-0.095095,-0.095202,-0.084947,-0.095202,-9.129116,-0.014954,-0.015439,0.031117,-0.015439,-1.435606,96
3,100021,2.176471,1,3,3.0,37,2.0,2,2,2.0,34,2.0,2,2,2.0,34,2.0,2,2,2.0,34,1.0,1,1,1.0,17,2.0,2,2,2.0,34,2.0,2,2,2.0,34,2.0,2,2,2.0,34,2.0,2,2,2.0,34,1.0,1,1,1.0,17,-0.548413,-0.548413,-0.548413,-0.548413,-9.323022,3.155953,3.155953,3.155953,3.155953,53.651206,-0.188645,-0.188645,-0.188645,-0.188645,-3.206971,-0.219623,-0.219623,-0.219623,-0.219623,-3.733597,-0.031514,-0.031514,-0.031514,-0.031514,-0.535743,-0.127806,-0.127806,-0.127806,-0.127806,-2.172705,-0.59711,-0.59711,-0.59711,-0.59711,-10.150872,-0.187044,-0.187044,-0.187044,-0.187044,-3.179755,-0.237107,-0.237107,-0.237107,-0.237107,-4.030826,-0.54583,-0.54583,-0.54583,-0.54583,-9.279103,-0.548187,-0.548187,-0.548187,-0.548187,-9.319176,-0.548243,-0.548243,-0.548243,-0.548243,-9.32013,-0.250347,-0.250347,-0.250347,-0.250347,-4.255892,-0.220397,-0.220397,-0.220397,-0.220397,-3.746755,-0.052224,-0.052224,-0.052224,-0.052224,-0.887816,-0.154427,-0.154427,-0.154427,-0.154427,-2.625256,-1.054892,-1.054892,-1.054892,-1.054892,-17.93316,0.919535,0.619547,1.219523,0.919535,15.632091,-0.095202,-0.095202,-0.095202,-0.095202,-1.61843,-0.015439,-0.015439,-0.015439,-0.015439,-0.262466,17
4,100023,1.0,1,1,1.0,8,2.0,2,2,2.0,16,2.0,2,2,2.0,16,2.0,2,2,2.0,16,1.0,1,1,1.0,8,2.0,2,2,2.0,16,2.0,2,2,2.0,16,2.0,2,2,2.0,16,2.0,2,2,2.0,16,1.0,1,1,1.0,8,-0.548413,-0.548413,-0.548413,-0.548413,-4.387305,-0.113887,-0.65886,0.431086,-0.113887,-0.911097,-0.188645,-0.188645,-0.188645,-0.188645,-1.509163,-0.219623,-0.219623,-0.219623,-0.219623,-1.756987,-0.031514,-0.031514,-0.031514,-0.031514,-0.252114,-0.127806,-0.127806,-0.127806,-0.127806,-1.02245,-0.59711,-0.59711,-0.59711,-0.59711,-4.776881,-0.187044,-0.187044,-0.187044,-0.187044,-1.496355,-0.237107,-0.237107,-0.237107,-0.237107,-1.896859,-0.54583,-0.54583,-0.54583,-0.54583,-4.366637,-0.548187,-0.548187,-0.548187,-0.548187,-4.385494,-0.548243,-0.548243,-0.548243,-0.548243,-4.385944,-0.250347,-0.250347,-0.250347,-0.250347,-2.002773,-0.220397,-0.220397,-0.220397,-0.220397,-1.763179,-0.052224,-0.052224,-0.052224,-0.052224,-0.417796,-0.154427,-0.154427,-0.154427,-0.154427,-1.235415,-1.054892,-1.054892,-1.054892,-1.054892,-8.439134,1.013281,0.882036,1.144526,1.013281,8.106248,-0.095202,-0.095202,-0.095202,-0.095202,-0.761614,-0.015439,-0.015439,-0.015439,-0.015439,-0.123514,8


In [121]:
len(credit_card_balance_df_agg)

103558

In [122]:
credit_card_balance_df_agg.shape

(103558, 152)

In [123]:
df_info_summary(credit_card_balance_df_agg)

                                      Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                                    103558           0     0.0    int64
NAME_CONTRACT_STATUS_mean                     103558           0     0.0  float64
NAME_CONTRACT_STATUS_min                      103558           0     0.0     int8
NAME_CONTRACT_STATUS_max                      103558           0     0.0     int8
NAME_CONTRACT_STATUS_median                   103558           0     0.0  float64
NAME_CONTRACT_STATUS_sum                      103558           0     0.0    int64
AMT_DRAWINGS_ATM_CURRENT_na_mean              103558           0     0.0  float64
AMT_DRAWINGS_ATM_CURRENT_na_min               103558           0     0.0     int8
AMT_DRAWINGS_ATM_CURRENT_na_max               103558           0     0.0     int8
AMT_DRAWINGS_ATM_CURRENT_na_median            103558           0     0.0  float64
AMT_DRAWINGS_ATM_CURRENT_na_sum               103558           0     0.0    int64
AMT_DRAWINGS_OTH

In [124]:
del credit_card_balance_df
gc.collect()

353

## Join with current application data

In [125]:
train_df = pd.read_csv(os.path.join(PATH, "home-credit-default-risk/application_train.csv"))

# Detectar columnas con solo "y"/"n" y mapear a 1/0
bool_cols = [c for c in train_df.columns if set(train_df[c].dropna().unique()) <= {"y", "n"}]
if bool_cols:
    train_df[bool_cols] = train_df[bool_cols].apply(lambda s: s.map({"y": 1, "n": 0}).astype("int8"))

In [126]:
len(train_df)

307511

In [127]:
df_info_summary(train_df)

                              Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                            307511           0    0.00    int64
TARGET                                307511           0    0.00    int64
NAME_CONTRACT_TYPE                    307511           0    0.00   object
CODE_GENDER                           307511           0    0.00   object
FLAG_OWN_CAR                          307511           0    0.00   object
FLAG_OWN_REALTY                       307511           0    0.00   object
CNT_CHILDREN                          307511           0    0.00    int64
AMT_INCOME_TOTAL                      307511           0    0.00  float64
AMT_CREDIT                            307511           0    0.00  float64
AMT_ANNUITY                           307499          12    0.00  float64
AMT_GOODS_PRICE                       307233         278    0.09  float64
NAME_TYPE_SUITE                       306219        1292    0.42   object
NAME_INCOME_TYPE                      

In [128]:
print("Count distinct of SK_ID_CURR: ", train_df["SK_ID_CURR"].nunique())

Count distinct of SK_ID_CURR:  307511


In [129]:
# Antes de los joins, agregamos prefijos a las columnas de cada dataset para facilitar la identificación posterior
bureau_df_agg = bureau_df_agg.add_prefix("BB_")
previous_application_summary = previous_application_summary.add_prefix("PA_")
pos_cash_balance_df_agg = pos_cash_balance_df_agg.add_prefix("PCB_")    
installments_payments_df_agg = installments_payments_df_agg.add_prefix("IP_")
credit_card_balance_df_agg = credit_card_balance_df_agg.add_prefix("CCB_")

In [130]:
previous_application_summary.head()

Unnamed: 0,PA_SK_ID_CURR,PA_CHANNEL_TYPE_mean,PA_CHANNEL_TYPE_min,PA_CHANNEL_TYPE_max,PA_CHANNEL_TYPE_median,PA_CHANNEL_TYPE_sum,PA_CODE_REJECT_REASON_mean,PA_CODE_REJECT_REASON_min,PA_CODE_REJECT_REASON_max,PA_CODE_REJECT_REASON_median,PA_CODE_REJECT_REASON_sum,PA_FLAG_LAST_APPL_PER_CONTRACT_mean,PA_FLAG_LAST_APPL_PER_CONTRACT_min,PA_FLAG_LAST_APPL_PER_CONTRACT_max,PA_FLAG_LAST_APPL_PER_CONTRACT_median,PA_FLAG_LAST_APPL_PER_CONTRACT_sum,PA_NAME_CASH_LOAN_PURPOSE_mean,PA_NAME_CASH_LOAN_PURPOSE_min,PA_NAME_CASH_LOAN_PURPOSE_max,PA_NAME_CASH_LOAN_PURPOSE_median,PA_NAME_CASH_LOAN_PURPOSE_sum,PA_NAME_CLIENT_TYPE_mean,PA_NAME_CLIENT_TYPE_min,PA_NAME_CLIENT_TYPE_max,PA_NAME_CLIENT_TYPE_median,PA_NAME_CLIENT_TYPE_sum,PA_NAME_CONTRACT_STATUS_mean,PA_NAME_CONTRACT_STATUS_min,PA_NAME_CONTRACT_STATUS_max,PA_NAME_CONTRACT_STATUS_median,PA_NAME_CONTRACT_STATUS_sum,PA_NAME_CONTRACT_TYPE_mean,PA_NAME_CONTRACT_TYPE_min,PA_NAME_CONTRACT_TYPE_max,PA_NAME_CONTRACT_TYPE_median,PA_NAME_CONTRACT_TYPE_sum,PA_NAME_GOODS_CATEGORY_mean,PA_NAME_GOODS_CATEGORY_min,PA_NAME_GOODS_CATEGORY_max,PA_NAME_GOODS_CATEGORY_median,PA_NAME_GOODS_CATEGORY_sum,PA_NAME_PAYMENT_TYPE_mean,PA_NAME_PAYMENT_TYPE_min,PA_NAME_PAYMENT_TYPE_max,PA_NAME_PAYMENT_TYPE_median,PA_NAME_PAYMENT_TYPE_sum,PA_NAME_PORTFOLIO_mean,PA_NAME_PORTFOLIO_min,PA_NAME_PORTFOLIO_max,PA_NAME_PORTFOLIO_median,PA_NAME_PORTFOLIO_sum,PA_NAME_PRODUCT_TYPE_mean,PA_NAME_PRODUCT_TYPE_min,PA_NAME_PRODUCT_TYPE_max,PA_NAME_PRODUCT_TYPE_median,PA_NAME_PRODUCT_TYPE_sum,PA_NAME_SELLER_INDUSTRY_mean,PA_NAME_SELLER_INDUSTRY_min,PA_NAME_SELLER_INDUSTRY_max,PA_NAME_SELLER_INDUSTRY_median,PA_NAME_SELLER_INDUSTRY_sum,PA_NAME_TYPE_SUITE_mean,PA_NAME_TYPE_SUITE_min,PA_NAME_TYPE_SUITE_max,PA_NAME_TYPE_SUITE_median,PA_NAME_TYPE_SUITE_sum,PA_NAME_YIELD_GROUP_mean,PA_NAME_YIELD_GROUP_min,PA_NAME_YIELD_GROUP_max,PA_NAME_YIELD_GROUP_median,PA_NAME_YIELD_GROUP_sum,PA_PRODUCT_COMBINATION_mean,PA_PRODUCT_COMBINATION_min,PA_PRODUCT_COMBINATION_max,PA_PRODUCT_COMBINATION_median,PA_PRODUCT_COMBINATION_sum,PA_WEEKDAY_APPR_PROCESS_START_mean,PA_WEEKDAY_APPR_PROCESS_START_min,PA_WEEKDAY_APPR_PROCESS_START_max,PA_WEEKDAY_APPR_PROCESS_START_median,PA_WEEKDAY_APPR_PROCESS_START_sum,PA_AMT_ANNUITY_na_mean,PA_AMT_ANNUITY_na_min,PA_AMT_ANNUITY_na_max,PA_AMT_ANNUITY_na_median,PA_AMT_ANNUITY_na_sum,PA_AMT_CREDIT_na_mean,PA_AMT_CREDIT_na_min,PA_AMT_CREDIT_na_max,PA_AMT_CREDIT_na_median,PA_AMT_CREDIT_na_sum,PA_AMT_DOWN_PAYMENT_na_mean,PA_AMT_DOWN_PAYMENT_na_min,PA_AMT_DOWN_PAYMENT_na_max,PA_AMT_DOWN_PAYMENT_na_median,PA_AMT_DOWN_PAYMENT_na_sum,PA_AMT_GOODS_PRICE_na_mean,PA_AMT_GOODS_PRICE_na_min,PA_AMT_GOODS_PRICE_na_max,PA_AMT_GOODS_PRICE_na_median,PA_AMT_GOODS_PRICE_na_sum,PA_CNT_PAYMENT_na_mean,PA_CNT_PAYMENT_na_min,PA_CNT_PAYMENT_na_max,PA_CNT_PAYMENT_na_median,PA_CNT_PAYMENT_na_sum,PA_DAYS_FIRST_DRAWING_na_mean,PA_DAYS_FIRST_DRAWING_na_min,PA_DAYS_FIRST_DRAWING_na_max,PA_DAYS_FIRST_DRAWING_na_median,PA_DAYS_FIRST_DRAWING_na_sum,PA_DAYS_FIRST_DUE_na_mean,PA_DAYS_FIRST_DUE_na_min,PA_DAYS_FIRST_DUE_na_max,PA_DAYS_FIRST_DUE_na_median,PA_DAYS_FIRST_DUE_na_sum,PA_DAYS_LAST_DUE_na_mean,PA_DAYS_LAST_DUE_na_min,PA_DAYS_LAST_DUE_na_max,PA_DAYS_LAST_DUE_na_median,PA_DAYS_LAST_DUE_na_sum,PA_DAYS_LAST_DUE_1ST_VERSION_na_mean,PA_DAYS_LAST_DUE_1ST_VERSION_na_min,PA_DAYS_LAST_DUE_1ST_VERSION_na_max,PA_DAYS_LAST_DUE_1ST_VERSION_na_median,PA_DAYS_LAST_DUE_1ST_VERSION_na_sum,PA_DAYS_TERMINATION_na_mean,PA_DAYS_TERMINATION_na_min,PA_DAYS_TERMINATION_na_max,PA_DAYS_TERMINATION_na_median,PA_DAYS_TERMINATION_na_sum,PA_NFLAG_INSURED_ON_APPROVAL_na_mean,PA_NFLAG_INSURED_ON_APPROVAL_na_min,PA_NFLAG_INSURED_ON_APPROVAL_na_max,PA_NFLAG_INSURED_ON_APPROVAL_na_median,PA_NFLAG_INSURED_ON_APPROVAL_na_sum,PA_RATE_DOWN_PAYMENT_na_mean,PA_RATE_DOWN_PAYMENT_na_min,PA_RATE_DOWN_PAYMENT_na_max,PA_RATE_DOWN_PAYMENT_na_median,PA_RATE_DOWN_PAYMENT_na_sum,PA_RATE_INTEREST_PRIMARY_na_mean,PA_RATE_INTEREST_PRIMARY_na_min,PA_RATE_INTEREST_PRIMARY_na_max,PA_RATE_INTEREST_PRIMARY_na_median,PA_RATE_INTEREST_PRIMARY_na_sum,PA_RATE_INTEREST_PRIVILEGED_na_mean,PA_RATE_INTEREST_PRIVILEGED_na_min,PA_RATE_INTEREST_PRIVILEGED_na_max,PA_RATE_INTEREST_PRIVILEGED_na_median,PA_RATE_INTEREST_PRIVILEGED_na_sum,PA_AMT_ANNUITY_mean,PA_AMT_ANNUITY_min,PA_AMT_ANNUITY_max,PA_AMT_ANNUITY_median,PA_AMT_ANNUITY_sum,PA_AMT_APPLICATION_mean,PA_AMT_APPLICATION_min,PA_AMT_APPLICATION_max,PA_AMT_APPLICATION_median,PA_AMT_APPLICATION_sum,PA_AMT_CREDIT_mean,PA_AMT_CREDIT_min,PA_AMT_CREDIT_max,PA_AMT_CREDIT_median,PA_AMT_CREDIT_sum,PA_AMT_DOWN_PAYMENT_mean,PA_AMT_DOWN_PAYMENT_min,PA_AMT_DOWN_PAYMENT_max,PA_AMT_DOWN_PAYMENT_median,PA_AMT_DOWN_PAYMENT_sum,PA_AMT_GOODS_PRICE_mean,PA_AMT_GOODS_PRICE_min,PA_AMT_GOODS_PRICE_max,PA_AMT_GOODS_PRICE_median,PA_AMT_GOODS_PRICE_sum,PA_CNT_PAYMENT_mean,PA_CNT_PAYMENT_min,PA_CNT_PAYMENT_max,PA_CNT_PAYMENT_median,PA_CNT_PAYMENT_sum,PA_DAYS_DECISION_mean,PA_DAYS_DECISION_min,PA_DAYS_DECISION_max,PA_DAYS_DECISION_median,PA_DAYS_DECISION_sum,PA_DAYS_FIRST_DRAWING_mean,PA_DAYS_FIRST_DRAWING_min,PA_DAYS_FIRST_DRAWING_max,PA_DAYS_FIRST_DRAWING_median,PA_DAYS_FIRST_DRAWING_sum,PA_DAYS_FIRST_DUE_mean,PA_DAYS_FIRST_DUE_min,PA_DAYS_FIRST_DUE_max,PA_DAYS_FIRST_DUE_median,PA_DAYS_FIRST_DUE_sum,PA_DAYS_LAST_DUE_mean,PA_DAYS_LAST_DUE_min,PA_DAYS_LAST_DUE_max,PA_DAYS_LAST_DUE_median,PA_DAYS_LAST_DUE_sum,PA_DAYS_LAST_DUE_1ST_VERSION_mean,PA_DAYS_LAST_DUE_1ST_VERSION_min,PA_DAYS_LAST_DUE_1ST_VERSION_max,PA_DAYS_LAST_DUE_1ST_VERSION_median,PA_DAYS_LAST_DUE_1ST_VERSION_sum,PA_DAYS_TERMINATION_mean,PA_DAYS_TERMINATION_min,PA_DAYS_TERMINATION_max,PA_DAYS_TERMINATION_median,PA_DAYS_TERMINATION_sum,PA_HOUR_APPR_PROCESS_START_mean,PA_HOUR_APPR_PROCESS_START_min,PA_HOUR_APPR_PROCESS_START_max,PA_HOUR_APPR_PROCESS_START_median,PA_HOUR_APPR_PROCESS_START_sum,PA_NFLAG_INSURED_ON_APPROVAL_mean,PA_NFLAG_INSURED_ON_APPROVAL_min,PA_NFLAG_INSURED_ON_APPROVAL_max,PA_NFLAG_INSURED_ON_APPROVAL_median,PA_NFLAG_INSURED_ON_APPROVAL_sum,PA_NFLAG_LAST_APPL_IN_DAY_mean,PA_NFLAG_LAST_APPL_IN_DAY_min,PA_NFLAG_LAST_APPL_IN_DAY_max,PA_NFLAG_LAST_APPL_IN_DAY_median,PA_NFLAG_LAST_APPL_IN_DAY_sum,PA_RATE_DOWN_PAYMENT_mean,PA_RATE_DOWN_PAYMENT_min,PA_RATE_DOWN_PAYMENT_max,PA_RATE_DOWN_PAYMENT_median,PA_RATE_DOWN_PAYMENT_sum,PA_RATE_INTEREST_PRIMARY_mean,PA_RATE_INTEREST_PRIMARY_min,PA_RATE_INTEREST_PRIMARY_max,PA_RATE_INTEREST_PRIMARY_median,PA_RATE_INTEREST_PRIMARY_sum,PA_RATE_INTEREST_PRIVILEGED_mean,PA_RATE_INTEREST_PRIVILEGED_min,PA_RATE_INTEREST_PRIVILEGED_max,PA_RATE_INTEREST_PRIVILEGED_median,PA_RATE_INTEREST_PRIVILEGED_sum,PA_SELLERPLACE_AREA_mean,PA_SELLERPLACE_AREA_min,PA_SELLERPLACE_AREA_max,PA_SELLERPLACE_AREA_median,PA_SELLERPLACE_AREA_sum,PA_SK_ID_PREV_mean,PA_SK_ID_PREV_min,PA_SK_ID_PREV_max,PA_SK_ID_PREV_median,PA_SK_ID_PREV_sum,PA_previous_application_records_count
0,100001,5.0,5,5,5.0,5,8.0,8,8,8.0,8,2.0,2,2,2.0,2,24.0,24,24,24.0,24,2.0,2,2,2.0,2,1.0,1,1,1.0,1,2.0,2,2,2.0,2,20.0,20,20,20.0,20,1.0,1,1,1.0,1,4.0,4,4,4.0,4,1.0,1,1,1.0,1,3.0,3,3,3.0,3,2.0,2,2,2.0,2,2.0,2,2,2.0,2,14.0,14,14,14.0,14,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,2.0,2,2,2.0,2,-0.831379,-0.831379,-0.831379,-0.831379,-0.831379,-0.513691,-0.513691,-0.513691,-0.513691,-0.513691,-0.540932,-0.540932,-0.540932,-0.540932,-0.540932,-0.101174,-0.101174,-0.101174,-0.101174,-0.101174,-0.627873,-0.627873,-0.627873,-0.627873,-0.627873,-0.552074,-0.552074,-0.552074,-0.552074,-0.552074,-1.102966,-1.102966,-1.102966,-1.102966,-1.102966,0.197503,0.197503,0.197503,0.197503,0.197503,-0.170613,-0.170613,-0.170613,-0.170613,-0.170613,-0.387346,-0.387346,-0.387346,-0.387346,-0.387346,-0.255368,-0.255368,-0.255368,-0.255368,-0.255368,-0.402338,-0.402338,-0.402338,-0.402338,-0.402338,0.154713,0.154713,0.154713,0.154713,0.154713,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,0.531527,0.531527,0.531527,0.531527,0.531527,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,-0.040821,-0.040821,-0.040821,-0.040821,-0.040821,1369693.0,1369693,1369693,1369693.0,1369693,1
1,100002,8.0,8,8,8.0,8,8.0,8,8,8.0,8,2.0,2,2,2.0,2,24.0,24,24,24.0,24,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,26.0,26,26,26.0,26,4.0,4,4,4.0,4,4.0,4,4,4.0,4,1.0,1,1,1.0,1,1.0,1,1,1.0,1,0.0,0,0,0.0,0,4.0,4,4,4.0,4,16.0,16,16,16.0,16,3.0,3,3,3.0,3,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,2.0,2,2,2.0,2,-0.42912,-0.42912,-0.42912,-0.42912,-0.42912,0.013051,0.013051,0.013051,0.013051,0.013051,-0.053548,-0.053548,-0.053548,-0.053548,-0.053548,-0.27536,-0.27536,-0.27536,-0.27536,-0.27536,-0.078781,-0.078781,-0.078781,-0.078781,-0.078781,0.683238,0.683238,0.683238,0.683238,0.683238,0.35256,0.35256,0.35256,0.35256,0.35256,0.197503,0.197503,0.197503,0.197503,0.197503,-0.150342,-0.150342,-0.150342,-0.150342,-0.150342,-0.374243,-0.374243,-0.374243,-0.374243,-0.374243,-0.236091,-0.236091,-0.236091,-0.236091,-0.236091,-0.389596,-0.389596,-0.389596,-0.389596,-0.389596,-1.045037,-1.045037,-1.045037,-1.045037,-1.045037,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,-0.864389,-0.864389,-0.864389,-0.864389,-0.864389,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,0.026103,0.026103,0.026103,0.026103,0.026103,1038818.0,1038818,1038818,1038818.0,1038818,1
2,100003,6.333333,5,8,6.0,19,8.0,8,8,8.0,24,2.0,2,2,2.0,6,24.333333,24,25,24.0,73,2.333333,2,3,2.0,7,1.0,1,1,1.0,3,1.666667,1,2,2.0,5,16.0,8,28,12.0,48,2.0,1,4,1.0,6,3.666667,3,4,4.0,11,1.666667,1,3,1.0,5,7.333333,5,11,6.0,22,3.666667,2,7,2.0,11,4.666667,4,5,5.0,14,10.0,8,12,10.0,30,2.666667,1,4,3.0,8,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.333333,1,2,1.0,4,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.0,1,1,1.0,3,1.333333,1,2,1.0,4,2.0,2,2,2.0,6,2.0,2,2,2.0,6,3.160497,-0.619935,6.332796,3.76863,9.481492,0.888732,-0.363496,2.475466,0.554226,2.666196,0.904269,-0.40198,2.636018,0.478769,2.712808,-0.078986,-0.27536,0.200541,-0.162139,-0.236959,0.834055,-0.471305,2.488113,0.485356,2.502165,-0.39766,-0.706488,-0.243246,-0.243246,-1.192981,-0.544629,-1.87437,0.172866,0.067616,-1.633888,0.197503,0.197503,0.197503,0.197503,0.592508,-0.162911,-0.181262,-0.153018,-0.154453,-0.488733,-0.382704,-0.390313,-0.378444,-0.379356,-1.148112,-0.249496,-0.261078,-0.242157,-0.245255,-0.748489,-0.397827,-0.405246,-0.39367,-0.394565,-1.193482,0.654609,-0.145224,1.354464,0.754588,1.963828,1.173491,-0.497734,2.009104,2.009104,3.520473,0.05954,0.05954,0.05954,0.05954,0.17862,-0.187945,-0.864389,0.474452,-0.173897,-0.563834,0.000521,0.000521,0.000521,0.000521,0.001563,0.031098,0.031098,0.031098,0.031098,0.093294,0.030733,-0.044189,0.152376,-0.015988,0.0922,2281150.0,1810518,2636178,2396755.0,6843451,3
3,100004,7.0,7,7,7.0,7,8.0,8,8,8.0,8,2.0,2,2,2.0,2,24.0,24,24,24.0,24,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,20.0,20,20,20.0,20,1.0,1,1,1.0,1,4.0,4,4,4.0,4,1.0,1,1,1.0,1,3.0,3,3,3.0,3,7.0,7,7,7.0,7,5.0,5,5,5.0,5,15.0,15,15,15.0,15,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,1.0,1,1,1.0,1,2.0,2,2,2.0,2,2.0,2,2,2.0,2,-0.724663,-0.724663,-0.724663,-0.724663,-0.724663,-0.515582,-0.515582,-0.515582,-0.515582,-0.515582,-0.552486,-0.552486,-0.552486,-0.552486,-0.552486,0.06057,0.06057,0.06057,0.06057,0.06057,-0.629843,-0.629843,-0.629843,-0.629843,-0.629843,-0.860902,-0.860902,-0.860902,-0.860902,-0.860902,0.084302,0.084302,0.084302,0.084302,0.084302,0.197503,0.197503,0.197503,0.197503,0.197503,-0.154223,-0.154223,-0.154223,-0.154223,-0.154223,-0.379989,-0.379989,-0.379989,-0.379989,-0.379989,-0.245813,-0.245813,-0.245813,-0.245813,-0.245813,-0.395164,-0.395164,-0.395164,-0.395164,-0.395164,-2.244787,-2.244787,-2.244787,-2.244787,-2.244787,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,1.972338,1.972338,1.972338,1.972338,1.972338,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,-0.039839,-0.039839,-0.039839,-0.039839,-0.039839,1564014.0,1564014,1564014,1564014.0,1564014,1
4,100005,5.5,5,6,5.5,11,8.0,8,8,8.0,16,2.0,2,2,2.0,4,24.5,24,25,24.5,49,2.0,1,3,2.0,4,1.5,1,2,1.5,3,1.5,1,2,1.5,3,24.0,20,28,24.0,48,2.5,1,4,2.5,5,4.5,4,5,4.5,9,1.0,1,1,1.0,2,7.0,3,11,7.0,14,0.0,0,0,0.0,0,1.5,1,2,1.5,3,8.5,3,14,8.5,17,3.0,1,5,3.0,6,1.5,1,2,1.5,3,1.0,1,1,1.0,2,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,1.5,1,2,1.5,3,2.0,2,2,2.0,4,2.0,2,2,2.0,4,-0.521715,-0.765949,-0.277481,-0.521715,-1.04343,-0.522321,-0.598518,-0.446125,-0.522321,-1.044643,-0.552578,-0.615599,-0.489557,-0.552578,-1.105156,-0.064471,-0.162139,0.033198,-0.064471,-0.128941,-0.436914,-0.55744,-0.316388,-0.436914,-0.873827,-0.243246,-0.243246,-0.243246,-0.243246,-0.486493,0.442408,0.158747,0.726069,0.442408,0.884816,0.197503,0.197503,0.197503,0.197503,0.395005,-0.153948,-0.155055,-0.15284,-0.153948,-0.307896,-0.37816,-0.378452,-0.377868,-0.37816,-0.75632,-0.241949,-0.242038,-0.24186,-0.241949,-0.483898,-0.393291,-0.393447,-0.393135,-0.393291,-0.786582,-0.595131,-0.745099,-0.445162,-0.595131,-1.190261,-0.497734,-0.497734,-0.497734,-0.497734,-0.995468,0.05954,0.05954,0.05954,0.05954,0.11908,0.209843,-0.173897,0.593583,0.209843,0.419686,0.000521,0.000521,0.000521,0.000521,0.001042,0.031098,0.031098,0.031098,0.031098,0.062196,-0.041523,-0.044189,-0.038857,-0.041523,-0.083046,2176837.0,1857999,2495675,2176837.0,4353674,2


In [131]:
# Join train_df con bureau_df
train_df_join = train_df.merge(bureau_df_agg, how="left", left_on="SK_ID_CURR", right_on="BB_SK_ID_CURR")
print("rows after join:", len(train_df_join))

rows after join: 307511


In [132]:
# Join train_df con previous_application_df
train_df_join = train_df_join.merge(previous_application_summary, how="left", left_on="SK_ID_CURR", right_on="PA_SK_ID_CURR")
print("rows after join:", len(train_df_join))

rows after join: 307511


In [133]:
# Join train_df con pos_cash_balance_df
train_df_join = train_df_join.merge(pos_cash_balance_df_agg, how="left", left_on="SK_ID_CURR", right_on="PCB_SK_ID_CURR")
print("rows after join:", len(train_df_join))

rows after join: 307511


In [134]:
# Join train_df con installments_payments_df
train_df_join = train_df_join.merge(installments_payments_df_agg, how="left", left_on="SK_ID_CURR", right_on="IP_SK_ID_CURR")
print("rows after join:", len(train_df_join))

rows after join: 307511


In [135]:
# Join train_df con credit_card_balance_df
train_df_join = train_df_join.merge(credit_card_balance_df_agg, how="left", left_on="SK_ID_CURR", right_on="CCB_SK_ID_CURR")
print("rows after join:", len(train_df_join))

rows after join: 307511


In [136]:
train_df_join.head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,BB_SK_ID_CURR,BB_CREDIT_ACTIVE_mean,BB_CREDIT_ACTIVE_min,BB_CREDIT_ACTIVE_max,BB_CREDIT_ACTIVE_median,BB_CREDIT_ACTIVE_sum,BB_CREDIT_CURRENCY_mean,BB_CREDIT_CURRENCY_min,BB_CREDIT_CURRENCY_max,BB_CREDIT_CURRENCY_median,BB_CREDIT_CURRENCY_sum,BB_CREDIT_TYPE_mean,BB_CREDIT_TYPE_min,BB_CREDIT_TYPE_max,BB_CREDIT_TYPE_median,BB_CREDIT_TYPE_sum,BB_AMT_ANNUITY_na_mean,BB_AMT_ANNUITY_na_min,BB_AMT_ANNUITY_na_max,BB_AMT_ANNUITY_na_median,BB_AMT_ANNUITY_na_sum,BB_AMT_CREDIT_MAX_OVERDUE_na_mean,BB_AMT_CREDIT_MAX_OVERDUE_na_min,BB_AMT_CREDIT_MAX_OVERDUE_na_max,BB_AMT_CREDIT_MAX_OVERDUE_na_median,BB_AMT_CREDIT_MAX_OVERDUE_na_sum,BB_AMT_CREDIT_SUM_na_mean,BB_AMT_CREDIT_SUM_na_min,BB_AMT_CREDIT_SUM_na_max,BB_AMT_CREDIT_SUM_na_median,BB_AMT_CREDIT_SUM_na_sum,BB_AMT_CREDIT_SUM_DEBT_na_mean,BB_AMT_CREDIT_SUM_DEBT_na_min,BB_AMT_CREDIT_SUM_DEBT_na_max,BB_AMT_CREDIT_SUM_DEBT_na_median,BB_AMT_CREDIT_SUM_DEBT_na_sum,BB_AMT_CREDIT_SUM_LIMIT_na_mean,BB_AMT_CREDIT_SUM_LIMIT_na_min,BB_AMT_CREDIT_SUM_LIMIT_na_max,BB_AMT_CREDIT_SUM_LIMIT_na_median,BB_AMT_CREDIT_SUM_LIMIT_na_sum,BB_DAYS_CREDIT_ENDDATE_na_mean,BB_DAYS_CREDIT_ENDDATE_na_min,BB_DAYS_CREDIT_ENDDATE_na_max,BB_DAYS_CREDIT_ENDDATE_na_median,BB_DAYS_CREDIT_ENDDATE_na_sum,BB_DAYS_ENDDATE_FACT_na_mean,BB_DAYS_ENDDATE_FACT_na_min,BB_DAYS_ENDDATE_FACT_na_max,BB_DAYS_ENDDATE_FACT_na_median,BB_DAYS_ENDDATE_FACT_na_sum,BB_AMT_ANNUITY_mean,BB_AMT_ANNUITY_min,BB_AMT_ANNUITY_max,BB_AMT_ANNUITY_median,BB_AMT_ANNUITY_sum,BB_AMT_CREDIT_MAX_OVERDUE_mean,BB_AMT_CREDIT_MAX_OVERDUE_min,BB_AMT_CREDIT_MAX_OVERDUE_max,BB_AMT_CREDIT_MAX_OVERDUE_median,BB_AMT_CREDIT_MAX_OVERDUE_sum,BB_AMT_CREDIT_SUM_mean,BB_AMT_CREDIT_SUM_min,BB_AMT_CREDIT_SUM_max,BB_AMT_CREDIT_SUM_median,BB_AMT_CREDIT_SUM_sum,BB_AMT_CREDIT_SUM_DEBT_mean,BB_AMT_CREDIT_SUM_DEBT_min,BB_AMT_CREDIT_SUM_DEBT_max,BB_AMT_CREDIT_SUM_DEBT_median,BB_AMT_CREDIT_SUM_DEBT_sum,BB_AMT_CREDIT_SUM_LIMIT_mean,BB_AMT_CREDIT_SUM_LIMIT_min,BB_AMT_CREDIT_SUM_LIMIT_max,BB_AMT_CREDIT_SUM_LIMIT_median,BB_AMT_CREDIT_SUM_LIMIT_sum,BB_AMT_CREDIT_SUM_OVERDUE_mean,BB_AMT_CREDIT_SUM_OVERDUE_min,BB_AMT_CREDIT_SUM_OVERDUE_max,BB_AMT_CREDIT_SUM_OVERDUE_median,BB_AMT_CREDIT_SUM_OVERDUE_sum,BB_CNT_CREDIT_PROLONG_mean,BB_CNT_CREDIT_PROLONG_min,BB_CNT_CREDIT_PROLONG_max,BB_CNT_CREDIT_PROLONG_median,BB_CNT_CREDIT_PROLONG_sum,BB_CREDIT_DAY_OVERDUE_mean,BB_CREDIT_DAY_OVERDUE_min,BB_CREDIT_DAY_OVERDUE_max,BB_CREDIT_DAY_OVERDUE_median,BB_CREDIT_DAY_OVERDUE_sum,BB_DAYS_CREDIT_mean,BB_DAYS_CREDIT_min,BB_DAYS_CREDIT_max,BB_DAYS_CREDIT_median,BB_DAYS_CREDIT_sum,BB_DAYS_CREDIT_ENDDATE_mean,BB_DAYS_CREDIT_ENDDATE_min,BB_DAYS_CREDIT_ENDDATE_max,BB_DAYS_CREDIT_ENDDATE_median,BB_DAYS_CREDIT_ENDDATE_sum,BB_DAYS_CREDIT_UPDATE_mean,BB_DAYS_CREDIT_UPDATE_min,BB_DAYS_CREDIT_UPDATE_max,BB_DAYS_CREDIT_UPDATE_median,BB_DAYS_CREDIT_UPDATE_sum,BB_DAYS_ENDDATE_FACT_mean,BB_DAYS_ENDDATE_FACT_min,BB_DAYS_ENDDATE_FACT_max,BB_DAYS_ENDDATE_FACT_median,BB_DAYS_ENDDATE_FACT_sum,BB_STATUS_mean,BB_STATUS_min,BB_STATUS_max,BB_STATUS_median,BB_STATUS_sum,BB_MONTHS_BALANCE_mean,BB_MONTHS_BALANCE_min,BB_MONTHS_BALANCE_max,BB_MONTHS_BALANCE_median,BB_MONTHS_BALANCE_sum,BB_bureau_records_count,PA_SK_ID_CURR,PA_CHANNEL_TYPE_mean,PA_CHANNEL_TYPE_min,PA_CHANNEL_TYPE_max,PA_CHANNEL_TYPE_median,PA_CHANNEL_TYPE_sum,PA_CODE_REJECT_REASON_mean,PA_CODE_REJECT_REASON_min,PA_CODE_REJECT_REASON_max,PA_CODE_REJECT_REASON_median,PA_CODE_REJECT_REASON_sum,PA_FLAG_LAST_APPL_PER_CONTRACT_mean,PA_FLAG_LAST_APPL_PER_CONTRACT_min,PA_FLAG_LAST_APPL_PER_CONTRACT_max,PA_FLAG_LAST_APPL_PER_CONTRACT_median,PA_FLAG_LAST_APPL_PER_CONTRACT_sum,PA_NAME_CASH_LOAN_PURPOSE_mean,PA_NAME_CASH_LOAN_PURPOSE_min,PA_NAME_CASH_LOAN_PURPOSE_max,PA_NAME_CASH_LOAN_PURPOSE_median,PA_NAME_CASH_LOAN_PURPOSE_sum,PA_NAME_CLIENT_TYPE_mean,PA_NAME_CLIENT_TYPE_min,PA_NAME_CLIENT_TYPE_max,PA_NAME_CLIENT_TYPE_median,PA_NAME_CLIENT_TYPE_sum,PA_NAME_CONTRACT_STATUS_mean,PA_NAME_CONTRACT_STATUS_min,PA_NAME_CONTRACT_STATUS_max,PA_NAME_CONTRACT_STATUS_median,PA_NAME_CONTRACT_STATUS_sum,PA_NAME_CONTRACT_TYPE_mean,PA_NAME_CONTRACT_TYPE_min,PA_NAME_CONTRACT_TYPE_max,PA_NAME_CONTRACT_TYPE_median,PA_NAME_CONTRACT_TYPE_sum,PA_NAME_GOODS_CATEGORY_mean,PA_NAME_GOODS_CATEGORY_min,PA_NAME_GOODS_CATEGORY_max,PA_NAME_GOODS_CATEGORY_median,PA_NAME_GOODS_CATEGORY_sum,PA_NAME_PAYMENT_TYPE_mean,PA_NAME_PAYMENT_TYPE_min,PA_NAME_PAYMENT_TYPE_max,PA_NAME_PAYMENT_TYPE_median,PA_NAME_PAYMENT_TYPE_sum,PA_NAME_PORTFOLIO_mean,PA_NAME_PORTFOLIO_min,PA_NAME_PORTFOLIO_max,PA_NAME_PORTFOLIO_median,PA_NAME_PORTFOLIO_sum,PA_NAME_PRODUCT_TYPE_mean,PA_NAME_PRODUCT_TYPE_min,PA_NAME_PRODUCT_TYPE_max,PA_NAME_PRODUCT_TYPE_median,PA_NAME_PRODUCT_TYPE_sum,PA_NAME_SELLER_INDUSTRY_mean,PA_NAME_SELLER_INDUSTRY_min,PA_NAME_SELLER_INDUSTRY_max,PA_NAME_SELLER_INDUSTRY_median,PA_NAME_SELLER_INDUSTRY_sum,PA_NAME_TYPE_SUITE_mean,PA_NAME_TYPE_SUITE_min,PA_NAME_TYPE_SUITE_max,PA_NAME_TYPE_SUITE_median,PA_NAME_TYPE_SUITE_sum,PA_NAME_YIELD_GROUP_mean,PA_NAME_YIELD_GROUP_min,PA_NAME_YIELD_GROUP_max,PA_NAME_YIELD_GROUP_median,PA_NAME_YIELD_GROUP_sum,PA_PRODUCT_COMBINATION_mean,PA_PRODUCT_COMBINATION_min,PA_PRODUCT_COMBINATION_max,PA_PRODUCT_COMBINATION_median,PA_PRODUCT_COMBINATION_sum,PA_WEEKDAY_APPR_PROCESS_START_mean,PA_WEEKDAY_APPR_PROCESS_START_min,PA_WEEKDAY_APPR_PROCESS_START_max,PA_WEEKDAY_APPR_PROCESS_START_median,PA_WEEKDAY_APPR_PROCESS_START_sum,PA_AMT_ANNUITY_na_mean,PA_AMT_ANNUITY_na_min,PA_AMT_ANNUITY_na_max,PA_AMT_ANNUITY_na_median,PA_AMT_ANNUITY_na_sum,PA_AMT_CREDIT_na_mean,PA_AMT_CREDIT_na_min,PA_AMT_CREDIT_na_max,PA_AMT_CREDIT_na_median,PA_AMT_CREDIT_na_sum,PA_AMT_DOWN_PAYMENT_na_mean,PA_AMT_DOWN_PAYMENT_na_min,PA_AMT_DOWN_PAYMENT_na_max,PA_AMT_DOWN_PAYMENT_na_median,PA_AMT_DOWN_PAYMENT_na_sum,PA_AMT_GOODS_PRICE_na_mean,PA_AMT_GOODS_PRICE_na_min,PA_AMT_GOODS_PRICE_na_max,PA_AMT_GOODS_PRICE_na_median,PA_AMT_GOODS_PRICE_na_sum,PA_CNT_PAYMENT_na_mean,PA_CNT_PAYMENT_na_min,PA_CNT_PAYMENT_na_max,PA_CNT_PAYMENT_na_median,PA_CNT_PAYMENT_na_sum,PA_DAYS_FIRST_DRAWING_na_mean,PA_DAYS_FIRST_DRAWING_na_min,PA_DAYS_FIRST_DRAWING_na_max,PA_DAYS_FIRST_DRAWING_na_median,PA_DAYS_FIRST_DRAWING_na_sum,PA_DAYS_FIRST_DUE_na_mean,PA_DAYS_FIRST_DUE_na_min,PA_DAYS_FIRST_DUE_na_max,PA_DAYS_FIRST_DUE_na_median,PA_DAYS_FIRST_DUE_na_sum,PA_DAYS_LAST_DUE_na_mean,PA_DAYS_LAST_DUE_na_min,PA_DAYS_LAST_DUE_na_max,PA_DAYS_LAST_DUE_na_median,PA_DAYS_LAST_DUE_na_sum,PA_DAYS_LAST_DUE_1ST_VERSION_na_mean,PA_DAYS_LAST_DUE_1ST_VERSION_na_min,PA_DAYS_LAST_DUE_1ST_VERSION_na_max,PA_DAYS_LAST_DUE_1ST_VERSION_na_median,PA_DAYS_LAST_DUE_1ST_VERSION_na_sum,PA_DAYS_TERMINATION_na_mean,PA_DAYS_TERMINATION_na_min,PA_DAYS_TERMINATION_na_max,PA_DAYS_TERMINATION_na_median,PA_DAYS_TERMINATION_na_sum,PA_NFLAG_INSURED_ON_APPROVAL_na_mean,PA_NFLAG_INSURED_ON_APPROVAL_na_min,PA_NFLAG_INSURED_ON_APPROVAL_na_max,PA_NFLAG_INSURED_ON_APPROVAL_na_median,PA_NFLAG_INSURED_ON_APPROVAL_na_sum,PA_RATE_DOWN_PAYMENT_na_mean,PA_RATE_DOWN_PAYMENT_na_min,PA_RATE_DOWN_PAYMENT_na_max,PA_RATE_DOWN_PAYMENT_na_median,PA_RATE_DOWN_PAYMENT_na_sum,PA_RATE_INTEREST_PRIMARY_na_mean,PA_RATE_INTEREST_PRIMARY_na_min,PA_RATE_INTEREST_PRIMARY_na_max,PA_RATE_INTEREST_PRIMARY_na_median,PA_RATE_INTEREST_PRIMARY_na_sum,PA_RATE_INTEREST_PRIVILEGED_na_mean,PA_RATE_INTEREST_PRIVILEGED_na_min,PA_RATE_INTEREST_PRIVILEGED_na_max,PA_RATE_INTEREST_PRIVILEGED_na_median,PA_RATE_INTEREST_PRIVILEGED_na_sum,PA_AMT_ANNUITY_mean,PA_AMT_ANNUITY_min,PA_AMT_ANNUITY_max,PA_AMT_ANNUITY_median,PA_AMT_ANNUITY_sum,PA_AMT_APPLICATION_mean,PA_AMT_APPLICATION_min,PA_AMT_APPLICATION_max,PA_AMT_APPLICATION_median,PA_AMT_APPLICATION_sum,PA_AMT_CREDIT_mean,PA_AMT_CREDIT_min,PA_AMT_CREDIT_max,PA_AMT_CREDIT_median,PA_AMT_CREDIT_sum,PA_AMT_DOWN_PAYMENT_mean,PA_AMT_DOWN_PAYMENT_min,PA_AMT_DOWN_PAYMENT_max,PA_AMT_DOWN_PAYMENT_median,PA_AMT_DOWN_PAYMENT_sum,PA_AMT_GOODS_PRICE_mean,PA_AMT_GOODS_PRICE_min,PA_AMT_GOODS_PRICE_max,PA_AMT_GOODS_PRICE_median,PA_AMT_GOODS_PRICE_sum,PA_CNT_PAYMENT_mean,PA_CNT_PAYMENT_min,PA_CNT_PAYMENT_max,PA_CNT_PAYMENT_median,PA_CNT_PAYMENT_sum,PA_DAYS_DECISION_mean,PA_DAYS_DECISION_min,PA_DAYS_DECISION_max,PA_DAYS_DECISION_median,PA_DAYS_DECISION_sum,PA_DAYS_FIRST_DRAWING_mean,PA_DAYS_FIRST_DRAWING_min,PA_DAYS_FIRST_DRAWING_max,PA_DAYS_FIRST_DRAWING_median,PA_DAYS_FIRST_DRAWING_sum,PA_DAYS_FIRST_DUE_mean,PA_DAYS_FIRST_DUE_min,PA_DAYS_FIRST_DUE_max,PA_DAYS_FIRST_DUE_median,PA_DAYS_FIRST_DUE_sum,PA_DAYS_LAST_DUE_mean,PA_DAYS_LAST_DUE_min,PA_DAYS_LAST_DUE_max,PA_DAYS_LAST_DUE_median,PA_DAYS_LAST_DUE_sum,PA_DAYS_LAST_DUE_1ST_VERSION_mean,PA_DAYS_LAST_DUE_1ST_VERSION_min,PA_DAYS_LAST_DUE_1ST_VERSION_max,PA_DAYS_LAST_DUE_1ST_VERSION_median,PA_DAYS_LAST_DUE_1ST_VERSION_sum,PA_DAYS_TERMINATION_mean,PA_DAYS_TERMINATION_min,PA_DAYS_TERMINATION_max,PA_DAYS_TERMINATION_median,PA_DAYS_TERMINATION_sum,PA_HOUR_APPR_PROCESS_START_mean,PA_HOUR_APPR_PROCESS_START_min,PA_HOUR_APPR_PROCESS_START_max,PA_HOUR_APPR_PROCESS_START_median,PA_HOUR_APPR_PROCESS_START_sum,PA_NFLAG_INSURED_ON_APPROVAL_mean,PA_NFLAG_INSURED_ON_APPROVAL_min,PA_NFLAG_INSURED_ON_APPROVAL_max,PA_NFLAG_INSURED_ON_APPROVAL_median,PA_NFLAG_INSURED_ON_APPROVAL_sum,PA_NFLAG_LAST_APPL_IN_DAY_mean,PA_NFLAG_LAST_APPL_IN_DAY_min,PA_NFLAG_LAST_APPL_IN_DAY_max,PA_NFLAG_LAST_APPL_IN_DAY_median,PA_NFLAG_LAST_APPL_IN_DAY_sum,PA_RATE_DOWN_PAYMENT_mean,PA_RATE_DOWN_PAYMENT_min,PA_RATE_DOWN_PAYMENT_max,PA_RATE_DOWN_PAYMENT_median,PA_RATE_DOWN_PAYMENT_sum,PA_RATE_INTEREST_PRIMARY_mean,PA_RATE_INTEREST_PRIMARY_min,PA_RATE_INTEREST_PRIMARY_max,PA_RATE_INTEREST_PRIMARY_median,PA_RATE_INTEREST_PRIMARY_sum,PA_RATE_INTEREST_PRIVILEGED_mean,PA_RATE_INTEREST_PRIVILEGED_min,PA_RATE_INTEREST_PRIVILEGED_max,PA_RATE_INTEREST_PRIVILEGED_median,PA_RATE_INTEREST_PRIVILEGED_sum,PA_SELLERPLACE_AREA_mean,PA_SELLERPLACE_AREA_min,PA_SELLERPLACE_AREA_max,PA_SELLERPLACE_AREA_median,PA_SELLERPLACE_AREA_sum,PA_SK_ID_PREV_mean,PA_SK_ID_PREV_min,PA_SK_ID_PREV_max,PA_SK_ID_PREV_median,PA_SK_ID_PREV_sum,PA_previous_application_records_count,PCB_SK_ID_CURR,PCB_NAME_CONTRACT_STATUS_mean,PCB_NAME_CONTRACT_STATUS_min,PCB_NAME_CONTRACT_STATUS_max,PCB_NAME_CONTRACT_STATUS_median,PCB_NAME_CONTRACT_STATUS_sum,PCB_CNT_INSTALMENT_na_mean,PCB_CNT_INSTALMENT_na_min,PCB_CNT_INSTALMENT_na_max,PCB_CNT_INSTALMENT_na_median,PCB_CNT_INSTALMENT_na_sum,PCB_CNT_INSTALMENT_FUTURE_na_mean,PCB_CNT_INSTALMENT_FUTURE_na_min,PCB_CNT_INSTALMENT_FUTURE_na_max,PCB_CNT_INSTALMENT_FUTURE_na_median,PCB_CNT_INSTALMENT_FUTURE_na_sum,PCB_CNT_INSTALMENT_mean,PCB_CNT_INSTALMENT_min,PCB_CNT_INSTALMENT_max,PCB_CNT_INSTALMENT_median,PCB_CNT_INSTALMENT_sum,PCB_CNT_INSTALMENT_FUTURE_mean,PCB_CNT_INSTALMENT_FUTURE_min,PCB_CNT_INSTALMENT_FUTURE_max,PCB_CNT_INSTALMENT_FUTURE_median,PCB_CNT_INSTALMENT_FUTURE_sum,PCB_MONTHS_BALANCE_mean,PCB_MONTHS_BALANCE_min,PCB_MONTHS_BALANCE_max,PCB_MONTHS_BALANCE_median,PCB_MONTHS_BALANCE_sum,PCB_SK_DPD_mean,PCB_SK_DPD_min,PCB_SK_DPD_max,PCB_SK_DPD_median,PCB_SK_DPD_sum,PCB_SK_DPD_DEF_mean,PCB_SK_DPD_DEF_min,PCB_SK_DPD_DEF_max,PCB_SK_DPD_DEF_median,PCB_SK_DPD_DEF_sum,PCB_pos_cash_balance_records_count,IP_SK_ID_CURR,IP_AMT_PAYMENT_na_mean,IP_AMT_PAYMENT_na_min,IP_AMT_PAYMENT_na_max,IP_AMT_PAYMENT_na_median,IP_AMT_PAYMENT_na_sum,IP_DAYS_ENTRY_PAYMENT_na_mean,IP_DAYS_ENTRY_PAYMENT_na_min,IP_DAYS_ENTRY_PAYMENT_na_max,IP_DAYS_ENTRY_PAYMENT_na_median,IP_DAYS_ENTRY_PAYMENT_na_sum,IP_AMT_INSTALMENT_mean,IP_AMT_INSTALMENT_min,IP_AMT_INSTALMENT_max,IP_AMT_INSTALMENT_median,IP_AMT_INSTALMENT_sum,IP_AMT_PAYMENT_mean,IP_AMT_PAYMENT_min,IP_AMT_PAYMENT_max,IP_AMT_PAYMENT_median,IP_AMT_PAYMENT_sum,IP_DAYS_ENTRY_PAYMENT_mean,IP_DAYS_ENTRY_PAYMENT_min,IP_DAYS_ENTRY_PAYMENT_max,IP_DAYS_ENTRY_PAYMENT_median,IP_DAYS_ENTRY_PAYMENT_sum,IP_DAYS_INSTALMENT_mean,IP_DAYS_INSTALMENT_min,IP_DAYS_INSTALMENT_max,IP_DAYS_INSTALMENT_median,IP_DAYS_INSTALMENT_sum,IP_NUM_INSTALMENT_NUMBER_mean,IP_NUM_INSTALMENT_NUMBER_min,IP_NUM_INSTALMENT_NUMBER_max,IP_NUM_INSTALMENT_NUMBER_median,IP_NUM_INSTALMENT_NUMBER_sum,IP_NUM_INSTALMENT_VERSION_mean,IP_NUM_INSTALMENT_VERSION_min,IP_NUM_INSTALMENT_VERSION_max,IP_NUM_INSTALMENT_VERSION_median,IP_NUM_INSTALMENT_VERSION_sum,IP_instalments_unpaid_mean,IP_instalments_unpaid_min,IP_instalments_unpaid_max,IP_instalments_unpaid_median,IP_instalments_unpaid_sum,IP_instalments_partially_paid_mean,IP_instalments_partially_paid_min,IP_instalments_partially_paid_max,IP_instalments_partially_paid_median,IP_instalments_partially_paid_sum,IP_instalments_overdue_mean,IP_instalments_overdue_min,IP_instalments_overdue_max,IP_instalments_overdue_median,IP_instalments_overdue_sum,IP_amount_debt_mean,IP_amount_debt_min,IP_amount_debt_max,IP_amount_debt_median,IP_amount_debt_sum,IP_dpd_mean,IP_dpd_min,IP_dpd_max,IP_dpd_median,IP_dpd_sum,IP_installments_payments_records_count,CCB_SK_ID_CURR,CCB_NAME_CONTRACT_STATUS_mean,CCB_NAME_CONTRACT_STATUS_min,CCB_NAME_CONTRACT_STATUS_max,CCB_NAME_CONTRACT_STATUS_median,CCB_NAME_CONTRACT_STATUS_sum,CCB_AMT_DRAWINGS_ATM_CURRENT_na_mean,CCB_AMT_DRAWINGS_ATM_CURRENT_na_min,CCB_AMT_DRAWINGS_ATM_CURRENT_na_max,CCB_AMT_DRAWINGS_ATM_CURRENT_na_median,CCB_AMT_DRAWINGS_ATM_CURRENT_na_sum,CCB_AMT_DRAWINGS_OTHER_CURRENT_na_mean,CCB_AMT_DRAWINGS_OTHER_CURRENT_na_min,CCB_AMT_DRAWINGS_OTHER_CURRENT_na_max,CCB_AMT_DRAWINGS_OTHER_CURRENT_na_median,CCB_AMT_DRAWINGS_OTHER_CURRENT_na_sum,CCB_AMT_DRAWINGS_POS_CURRENT_na_mean,CCB_AMT_DRAWINGS_POS_CURRENT_na_min,CCB_AMT_DRAWINGS_POS_CURRENT_na_max,CCB_AMT_DRAWINGS_POS_CURRENT_na_median,CCB_AMT_DRAWINGS_POS_CURRENT_na_sum,CCB_AMT_INST_MIN_REGULARITY_na_mean,CCB_AMT_INST_MIN_REGULARITY_na_min,CCB_AMT_INST_MIN_REGULARITY_na_max,CCB_AMT_INST_MIN_REGULARITY_na_median,CCB_AMT_INST_MIN_REGULARITY_na_sum,CCB_AMT_PAYMENT_CURRENT_na_mean,CCB_AMT_PAYMENT_CURRENT_na_min,CCB_AMT_PAYMENT_CURRENT_na_max,CCB_AMT_PAYMENT_CURRENT_na_median,CCB_AMT_PAYMENT_CURRENT_na_sum,CCB_CNT_DRAWINGS_ATM_CURRENT_na_mean,CCB_CNT_DRAWINGS_ATM_CURRENT_na_min,CCB_CNT_DRAWINGS_ATM_CURRENT_na_max,CCB_CNT_DRAWINGS_ATM_CURRENT_na_median,CCB_CNT_DRAWINGS_ATM_CURRENT_na_sum,CCB_CNT_DRAWINGS_OTHER_CURRENT_na_mean,CCB_CNT_DRAWINGS_OTHER_CURRENT_na_min,CCB_CNT_DRAWINGS_OTHER_CURRENT_na_max,CCB_CNT_DRAWINGS_OTHER_CURRENT_na_median,CCB_CNT_DRAWINGS_OTHER_CURRENT_na_sum,CCB_CNT_DRAWINGS_POS_CURRENT_na_mean,CCB_CNT_DRAWINGS_POS_CURRENT_na_min,CCB_CNT_DRAWINGS_POS_CURRENT_na_max,CCB_CNT_DRAWINGS_POS_CURRENT_na_median,CCB_CNT_DRAWINGS_POS_CURRENT_na_sum,CCB_CNT_INSTALMENT_MATURE_CUM_na_mean,CCB_CNT_INSTALMENT_MATURE_CUM_na_min,CCB_CNT_INSTALMENT_MATURE_CUM_na_max,CCB_CNT_INSTALMENT_MATURE_CUM_na_median,CCB_CNT_INSTALMENT_MATURE_CUM_na_sum,CCB_AMT_BALANCE_mean,CCB_AMT_BALANCE_min,CCB_AMT_BALANCE_max,CCB_AMT_BALANCE_median,CCB_AMT_BALANCE_sum,CCB_AMT_CREDIT_LIMIT_ACTUAL_mean,CCB_AMT_CREDIT_LIMIT_ACTUAL_min,CCB_AMT_CREDIT_LIMIT_ACTUAL_max,CCB_AMT_CREDIT_LIMIT_ACTUAL_median,CCB_AMT_CREDIT_LIMIT_ACTUAL_sum,CCB_AMT_DRAWINGS_ATM_CURRENT_mean,CCB_AMT_DRAWINGS_ATM_CURRENT_min,CCB_AMT_DRAWINGS_ATM_CURRENT_max,CCB_AMT_DRAWINGS_ATM_CURRENT_median,CCB_AMT_DRAWINGS_ATM_CURRENT_sum,CCB_AMT_DRAWINGS_CURRENT_mean,CCB_AMT_DRAWINGS_CURRENT_min,CCB_AMT_DRAWINGS_CURRENT_max,CCB_AMT_DRAWINGS_CURRENT_median,CCB_AMT_DRAWINGS_CURRENT_sum,CCB_AMT_DRAWINGS_OTHER_CURRENT_mean,CCB_AMT_DRAWINGS_OTHER_CURRENT_min,CCB_AMT_DRAWINGS_OTHER_CURRENT_max,CCB_AMT_DRAWINGS_OTHER_CURRENT_median,CCB_AMT_DRAWINGS_OTHER_CURRENT_sum,CCB_AMT_DRAWINGS_POS_CURRENT_mean,CCB_AMT_DRAWINGS_POS_CURRENT_min,CCB_AMT_DRAWINGS_POS_CURRENT_max,CCB_AMT_DRAWINGS_POS_CURRENT_median,CCB_AMT_DRAWINGS_POS_CURRENT_sum,CCB_AMT_INST_MIN_REGULARITY_mean,CCB_AMT_INST_MIN_REGULARITY_min,CCB_AMT_INST_MIN_REGULARITY_max,CCB_AMT_INST_MIN_REGULARITY_median,CCB_AMT_INST_MIN_REGULARITY_sum,CCB_AMT_PAYMENT_CURRENT_mean,CCB_AMT_PAYMENT_CURRENT_min,CCB_AMT_PAYMENT_CURRENT_max,CCB_AMT_PAYMENT_CURRENT_median,CCB_AMT_PAYMENT_CURRENT_sum,CCB_AMT_PAYMENT_TOTAL_CURRENT_mean,CCB_AMT_PAYMENT_TOTAL_CURRENT_min,CCB_AMT_PAYMENT_TOTAL_CURRENT_max,CCB_AMT_PAYMENT_TOTAL_CURRENT_median,CCB_AMT_PAYMENT_TOTAL_CURRENT_sum,CCB_AMT_RECEIVABLE_PRINCIPAL_mean,CCB_AMT_RECEIVABLE_PRINCIPAL_min,CCB_AMT_RECEIVABLE_PRINCIPAL_max,CCB_AMT_RECEIVABLE_PRINCIPAL_median,CCB_AMT_RECEIVABLE_PRINCIPAL_sum,CCB_AMT_RECIVABLE_mean,CCB_AMT_RECIVABLE_min,CCB_AMT_RECIVABLE_max,CCB_AMT_RECIVABLE_median,CCB_AMT_RECIVABLE_sum,CCB_AMT_TOTAL_RECEIVABLE_mean,CCB_AMT_TOTAL_RECEIVABLE_min,CCB_AMT_TOTAL_RECEIVABLE_max,CCB_AMT_TOTAL_RECEIVABLE_median,CCB_AMT_TOTAL_RECEIVABLE_sum,CCB_CNT_DRAWINGS_ATM_CURRENT_mean,CCB_CNT_DRAWINGS_ATM_CURRENT_min,CCB_CNT_DRAWINGS_ATM_CURRENT_max,CCB_CNT_DRAWINGS_ATM_CURRENT_median,CCB_CNT_DRAWINGS_ATM_CURRENT_sum,CCB_CNT_DRAWINGS_CURRENT_mean,CCB_CNT_DRAWINGS_CURRENT_min,CCB_CNT_DRAWINGS_CURRENT_max,CCB_CNT_DRAWINGS_CURRENT_median,CCB_CNT_DRAWINGS_CURRENT_sum,CCB_CNT_DRAWINGS_OTHER_CURRENT_mean,CCB_CNT_DRAWINGS_OTHER_CURRENT_min,CCB_CNT_DRAWINGS_OTHER_CURRENT_max,CCB_CNT_DRAWINGS_OTHER_CURRENT_median,CCB_CNT_DRAWINGS_OTHER_CURRENT_sum,CCB_CNT_DRAWINGS_POS_CURRENT_mean,CCB_CNT_DRAWINGS_POS_CURRENT_min,CCB_CNT_DRAWINGS_POS_CURRENT_max,CCB_CNT_DRAWINGS_POS_CURRENT_median,CCB_CNT_DRAWINGS_POS_CURRENT_sum,CCB_CNT_INSTALMENT_MATURE_CUM_mean,CCB_CNT_INSTALMENT_MATURE_CUM_min,CCB_CNT_INSTALMENT_MATURE_CUM_max,CCB_CNT_INSTALMENT_MATURE_CUM_median,CCB_CNT_INSTALMENT_MATURE_CUM_sum,CCB_MONTHS_BALANCE_mean,CCB_MONTHS_BALANCE_min,CCB_MONTHS_BALANCE_max,CCB_MONTHS_BALANCE_median,CCB_MONTHS_BALANCE_sum,CCB_SK_DPD_mean,CCB_SK_DPD_min,CCB_SK_DPD_max,CCB_SK_DPD_median,CCB_SK_DPD_sum,CCB_SK_DPD_DEF_mean,CCB_SK_DPD_DEF_min,CCB_SK_DPD_DEF_max,CCB_SK_DPD_DEF_median,CCB_SK_DPD_DEF_sum,CCB_credit_card_balance_records_count
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,-9461,-637,-3648.0,-2120,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,100002.0,2.5,1.0,3.0,3.0,20.0,1.0,1.0,1.0,1.0,8.0,4.5,4.0,5.0,4.5,36.0,1.125,1.0,2.0,1.0,9.0,1.375,1.0,2.0,1.0,11.0,1.0,1.0,1.0,1.0,8.0,1.375,1.0,2.0,1.0,11.0,1.5,1.0,2.0,1.5,12.0,1.25,1.0,2.0,1.0,10.0,1.25,1.0,2.0,1.0,10.0,-0.025735,-0.025735,-0.025735,-0.025735,-0.205883,-0.00222,-0.010902,0.030778,-0.010902,-0.017759,-0.214698,-0.308741,0.082629,-0.261663,-1.717582,-0.136945,-0.185991,0.206376,-0.185991,-1.095563,-0.002274,-0.111609,0.763072,-0.111609,-0.018193,-0.006385,-0.006385,-0.006385,-0.006385,-0.051081,-0.06662,-0.06662,-0.06662,-0.06662,-0.532957,-0.022388,-0.022388,-0.022388,-0.022388,-0.179106,0.337173,-0.370857,1.306783,0.125267,2.69738,-0.165842,-0.316128,0.066324,-0.162899,-1.326737,0.130244,-0.820332,0.814083,0.265347,1.041956,0.39574,-0.371926,1.643609,0.120994,3.165921,47.5,6.0,94.0,55.0,380.0,3.564842,-5.872026,15.582428,2.843846,28.518732,8.0,100002.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,2.0,2.0,2.0,2.0,2.0,24.0,24.0,24.0,24.0,24.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,26.0,26.0,26.0,26.0,26.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,4.0,4.0,4.0,16.0,16.0,16.0,16.0,16.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,-0.42912,-0.42912,-0.42912,-0.42912,-0.42912,0.013051,0.013051,0.013051,0.013051,0.013051,-0.053548,-0.053548,-0.053548,-0.053548,-0.053548,-0.27536,-0.27536,-0.27536,-0.27536,-0.27536,-0.078781,-0.078781,-0.078781,-0.078781,-0.078781,0.683238,0.683238,0.683238,0.683238,0.683238,0.35256,0.35256,0.35256,0.35256,0.35256,0.197503,0.197503,0.197503,0.197503,0.197503,-0.150342,-0.150342,-0.150342,-0.150342,-0.150342,-0.374243,-0.374243,-0.374243,-0.374243,-0.374243,-0.236091,-0.236091,-0.236091,-0.236091,-0.236091,-0.389596,-0.389596,-0.389596,-0.389596,-0.389596,-1.045037,-1.045037,-1.045037,-1.045037,-1.045037,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,-0.864389,-0.864389,-0.864389,-0.864389,-0.864389,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,0.026103,0.026103,0.026103,0.026103,0.026103,1038818.0,1038818.0,1038818.0,1038818.0,1038818.0,1.0,100002.0,1.0,1.0,1.0,1.0,19.0,1.0,1.0,1.0,1.0,19.0,1.0,1.0,1.0,1.0,19.0,0.577824,0.577824,0.577824,0.577824,10.978658,0.407827,-0.403277,1.218932,0.407827,7.748722,0.959566,0.614296,1.304836,0.959566,18.23175,-0.087458,-0.087458,-0.087458,-0.087458,-1.661705,-0.019976,-0.019976,-0.019976,-0.019976,-0.379547,19.0,100002.0,1.0,1.0,1.0,1.0,19.0,1.0,1.0,1.0,1.0,19.0,-0.108595,-0.154224,0.712728,-0.154224,-2.063298,-0.103728,-0.145889,0.655169,-0.145889,-1.970828,0.918973,0.579715,1.251789,0.923247,17.460495,0.932984,0.595883,1.270085,0.932984,17.726696,-0.332691,-0.670224,0.004842,-0.332691,-6.32113,0.189327,0.138486,1.104468,0.138486,3.597213,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.003158,0.0,0.06,0.0,0.06,0.0,0.0,0.0,0.0,0.0,19.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.003541,-16765,-1188,-1186.0,-291,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,100003.0,2.5,1.0,3.0,3.0,10.0,1.0,1.0,1.0,1.0,4.0,4.5,4.0,5.0,4.5,18.0,2.0,2.0,2.0,2.0,8.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,4.0,1.25,1.0,2.0,1.0,5.0,-0.025735,-0.025735,-0.025735,-0.025735,-0.102941,-0.010902,-0.010902,-0.010902,-0.010902,-0.04361,-0.08753,-0.289392,0.395725,-0.228227,-0.35012,-0.185991,-0.185991,-0.185991,-0.185991,-0.743965,5.425463,-0.111609,22.036678,-0.111609,21.701851,-0.006385,-0.006385,-0.006385,-0.006385,-0.025541,-0.06662,-0.06662,-0.06662,-0.06662,-0.266479,-0.022388,-0.022388,-0.022388,-0.022388,-0.089553,-0.325269,-1.815841,0.67421,-0.079722,-1.301075,-0.207195,-0.597392,0.156362,-0.193876,-0.828781,-0.308363,-2.132859,0.764135,0.067636,-1.233452,-0.13029,-2.031365,0.75951,0.375348,-0.521159,,,,,0.0,,,,,0.0,4.0,100003.0,6.333333,5.0,8.0,6.0,19.0,8.0,8.0,8.0,8.0,24.0,2.0,2.0,2.0,2.0,6.0,24.333333,24.0,25.0,24.0,73.0,2.333333,2.0,3.0,2.0,7.0,1.0,1.0,1.0,1.0,3.0,1.666667,1.0,2.0,2.0,5.0,16.0,8.0,28.0,12.0,48.0,2.0,1.0,4.0,1.0,6.0,3.666667,3.0,4.0,4.0,11.0,1.666667,1.0,3.0,1.0,5.0,7.333333,5.0,11.0,6.0,22.0,3.666667,2.0,7.0,2.0,11.0,4.666667,4.0,5.0,5.0,14.0,10.0,8.0,12.0,10.0,30.0,2.666667,1.0,4.0,3.0,8.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.333333,1.0,2.0,1.0,4.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.333333,1.0,2.0,1.0,4.0,2.0,2.0,2.0,2.0,6.0,2.0,2.0,2.0,2.0,6.0,3.160497,-0.619935,6.332796,3.76863,9.481492,0.888732,-0.363496,2.475466,0.554226,2.666196,0.904269,-0.40198,2.636018,0.478769,2.712808,-0.078986,-0.27536,0.200541,-0.162139,-0.236959,0.834055,-0.471305,2.488113,0.485356,2.502165,-0.39766,-0.706488,-0.243246,-0.243246,-1.192981,-0.544629,-1.87437,0.172866,0.067616,-1.633888,0.197503,0.197503,0.197503,0.197503,0.592508,-0.162911,-0.181262,-0.153018,-0.154453,-0.488733,-0.382704,-0.390313,-0.378444,-0.379356,-1.148112,-0.249496,-0.261078,-0.242157,-0.245255,-0.748489,-0.397827,-0.405246,-0.39367,-0.394565,-1.193482,0.654609,-0.145224,1.354464,0.754588,1.963828,1.173491,-0.497734,2.009104,2.009104,3.520473,0.05954,0.05954,0.05954,0.05954,0.17862,-0.187945,-0.864389,0.474452,-0.173897,-0.563834,0.000521,0.000521,0.000521,0.000521,0.001563,0.031098,0.031098,0.031098,0.031098,0.093294,0.030733,-0.044189,0.152376,-0.015988,0.0922,2281150.0,1810518.0,2636178.0,2396755.0,6843451.0,3.0,100003.0,1.285714,1.0,5.0,1.0,36.0,1.0,1.0,1.0,1.0,28.0,1.0,1.0,1.0,1.0,28.0,-0.581632,-0.924401,-0.42366,-0.42366,-16.285689,-0.422589,-0.944013,0.137459,-0.403277,-11.832489,-0.336566,-1.610776,0.652659,0.326571,-9.423854,-0.087458,-0.087458,-0.087458,-0.087458,-2.448829,-0.019976,-0.019976,-0.019976,-0.019976,-0.559332,28.0,100003.0,1.0,1.0,1.0,1.0,25.0,1.0,1.0,1.0,1.0,25.0,0.943315,-0.205416,10.75305,0.933844,23.582875,0.86823,-0.19319,9.93236,0.859478,21.705746,-0.417553,-1.59016,0.633431,0.306138,-10.438827,-0.419366,-1.58279,0.63209,0.306225,-10.484161,-0.517209,-0.670224,-0.257684,-0.520209,-12.930226,0.177125,0.138486,1.104468,0.138486,4.428128,0.0,0.0,0.0,0.0,0.0,0.52,0.0,1.0,1.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0808,0.0,0.82,0.07,2.02,0.0,0.0,0.0,0.0,0.0,25.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,100004.0,3.0,3.0,3.0,3.0,6.0,1.0,1.0,1.0,1.0,2.0,4.0,4.0,4.0,4.0,8.0,2.0,2.0,2.0,2.0,4.0,1.5,1.0,2.0,1.5,3.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,-0.025735,-0.025735,-0.025735,-0.025735,-0.051471,-0.010902,-0.010902,-0.010902,-0.010902,-0.021805,-0.226537,-0.226554,-0.226521,-0.226537,-0.453074,-0.185991,-0.185991,-0.185991,-0.185991,-0.371983,-0.111609,-0.111609,-0.111609,-0.111609,-0.223219,-0.006385,-0.006385,-0.006385,-0.006385,-0.01277,-0.06662,-0.06662,-0.06662,-0.06662,-0.133239,-0.022388,-0.022388,-0.022388,-0.022388,-0.044777,0.345976,-0.231263,0.923215,0.345976,0.691951,-0.195631,-0.217624,-0.173638,-0.195631,-0.391262,0.085673,-0.122445,0.29379,0.085673,0.171345,0.772666,0.508665,1.036668,0.772666,1.545332,,,,,0.0,,,,,0.0,2.0,100004.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,8.0,8.0,8.0,2.0,2.0,2.0,2.0,2.0,24.0,24.0,24.0,24.0,24.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,20.0,20.0,20.0,20.0,20.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,7.0,7.0,5.0,5.0,5.0,5.0,5.0,15.0,15.0,15.0,15.0,15.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,-0.724663,-0.724663,-0.724663,-0.724663,-0.724663,-0.515582,-0.515582,-0.515582,-0.515582,-0.515582,-0.552486,-0.552486,-0.552486,-0.552486,-0.552486,0.06057,0.06057,0.06057,0.06057,0.06057,-0.629843,-0.629843,-0.629843,-0.629843,-0.629843,-0.860902,-0.860902,-0.860902,-0.860902,-0.860902,0.084302,0.084302,0.084302,0.084302,0.084302,0.197503,0.197503,0.197503,0.197503,0.197503,-0.154223,-0.154223,-0.154223,-0.154223,-0.154223,-0.379989,-0.379989,-0.379989,-0.379989,-0.379989,-0.245813,-0.245813,-0.245813,-0.245813,-0.245813,-0.395164,-0.395164,-0.395164,-0.395164,-0.395164,-2.244787,-2.244787,-2.244787,-2.244787,-2.244787,-0.497734,-0.497734,-0.497734,-0.497734,-0.497734,0.05954,0.05954,0.05954,0.05954,0.05954,1.972338,1.972338,1.972338,1.972338,1.972338,0.000521,0.000521,0.000521,0.000521,0.000521,0.031098,0.031098,0.031098,0.031098,0.031098,-0.039839,-0.039839,-0.039839,-0.039839,-0.039839,1564014.0,1564014.0,1564014.0,1564014.0,1564014.0,1.0,100004.0,2.0,1.0,5.0,1.0,8.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,4.0,-1.11218,-1.174772,-1.091315,-1.091315,-4.448719,-0.741237,-0.944013,-0.583522,-0.718706,-2.964948,0.364934,0.307389,0.422479,0.364934,1.459738,-0.087458,-0.087458,-0.087458,-0.087458,-0.349833,-0.019976,-0.019976,-0.019976,-0.019976,-0.079905,4.0,100004.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,-0.19685,-0.231236,-0.128078,-0.231236,-0.59055,-0.185275,-0.217047,-0.12173,-0.217047,-0.555825,0.36152,0.319879,0.404826,0.359854,1.084559,0.359912,0.322456,0.397368,0.359912,1.079735,-0.63272,-0.670224,-0.595217,-0.63272,-1.898161,0.46048,0.138486,1.104468,0.138486,1.38144,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,-19005,-3039,-9833.0,-2437,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,100006.0,6.111111,5.0,8.0,6.0,55.0,7.444444,3.0,8.0,8.0,67.0,2.0,2.0,2.0,2.0,18.0,24.555556,24.0,25.0,25.0,221.0,2.777778,1.0,3.0,3.0,25.0,1.555556,1.0,3.0,1.0,14.0,1.666667,1.0,3.0,1.0,15.0,22.888889,3.0,28.0,28.0,206.0,2.666667,1.0,4.0,4.0,24.0,3.666667,1.0,5.0,4.0,33.0,1.888889,1.0,3.0,1.0,17.0,9.555556,4.0,11.0,11.0,86.0,2.555556,0.0,7.0,0.0,23.0,2.333333,1.0,5.0,2.0,21.0,6.0,1.0,12.0,7.0,54.0,4.777778,3.0,6.0,5.0,43.0,1.333333,1.0,2.0,1.0,12.0,1.0,1.0,1.0,1.0,9.0,1.777778,1.0,2.0,2.0,16.0,1.333333,1.0,2.0,1.0,12.0,1.333333,1.0,2.0,1.0,12.0,1.555556,1.0,2.0,2.0,14.0,1.555556,1.0,2.0,2.0,14.0,1.555556,1.0,2.0,2.0,14.0,1.555556,1.0,2.0,2.0,14.0,1.555556,1.0,2.0,2.0,14.0,1.555556,1.0,2.0,2.0,14.0,1.777778,1.0,2.0,2.0,16.0,2.0,2.0,2.0,2.0,18.0,2.0,2.0,2.0,2.0,18.0,0.34991,-0.942787,1.900815,-0.106735,3.149191,0.331203,-0.598518,1.75308,0.323677,2.980824,0.300029,-0.615599,2.230251,0.225429,2.70026,0.347857,-0.162139,4.354877,-0.162139,3.130709,0.386173,-0.620478,1.735077,0.245025,3.47556,0.322938,-1.16973,2.536205,-0.243246,2.906445,0.78069,0.338442,0.898062,0.898062,7.026211,0.197503,0.197503,0.197503,0.197503,1.777524,0.568659,-0.155055,6.331508,-0.155055,5.117928,0.290141,-0.378452,2.62817,-0.378452,2.611272,0.243206,-0.24186,4.097906,-0.24186,2.188856,0.256253,-0.393447,2.528448,-0.393447,2.306275,0.654609,-0.145224,0.754588,0.754588,5.891483,-0.497734,-0.497734,-0.497734,-0.497734,-4.479607,0.05954,0.05954,0.05954,0.05954,0.535859,0.15855,-0.173897,2.050242,-0.173897,1.42695,0.000521,0.000521,0.000521,0.000521,0.00469,0.031098,0.031098,0.031098,0.031098,0.279881,0.081414,-0.044189,1.081882,-0.044189,0.732723,1932462.0,1020698.0,2827850.0,2078043.0,17392159.0,9.0,100006.0,1.666667,1.0,7.0,1.0,35.0,1.047619,1.0,2.0,1.0,22.0,1.047619,1.0,2.0,1.0,22.0,-0.42366,-1.341686,2.580792,-0.42366,-8.896852,-0.171533,-0.944013,3.381877,-0.223031,-3.602188,0.97418,0.575933,1.304836,1.036292,20.457788,-0.087458,-0.087458,-0.087458,-0.087458,-1.836622,-0.019976,-0.019976,-0.019976,-0.019976,-0.419499,21.0,100006.0,1.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,16.0,0.907573,-0.288074,13.342547,0.236831,14.521163,0.835204,-0.269566,12.325038,0.215443,13.363266,0.973684,0.594705,1.29801,1.049417,15.578941,0.986358,0.620853,1.287565,1.044103,15.781734,-0.541305,-0.670224,-0.332691,-0.557713,-8.660882,0.259234,0.138486,1.104468,0.138486,4.147738,0.0,0.0,0.0,0.0,0.0,0.6875,0.0,1.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,0.07625,0.0,1.02,0.02,1.22,0.0,0.0,0.0,0.0,0.0,16.0,100006.0,1.0,1.0,1.0,1.0,6.0,2.0,2.0,2.0,2.0,12.0,2.0,2.0,2.0,2.0,12.0,2.0,2.0,2.0,2.0,12.0,1.0,1.0,1.0,1.0,6.0,2.0,2.0,2.0,2.0,12.0,2.0,2.0,2.0,2.0,12.0,2.0,2.0,2.0,2.0,12.0,2.0,2.0,2.0,2.0,12.0,1.0,1.0,1.0,1.0,6.0,-0.548413,-0.548413,-0.548413,-0.548413,-3.290478,0.703573,0.703573,0.703573,0.703573,4.221438,-0.188645,-0.188645,-0.188645,-0.188645,-1.131872,-0.219623,-0.219623,-0.219623,-0.219623,-1.31774,-0.031514,-0.031514,-0.031514,-0.031514,-0.189086,-0.127806,-0.127806,-0.127806,-0.127806,-0.766837,-0.59711,-0.59711,-0.59711,-0.59711,-3.582661,-0.187044,-0.187044,-0.187044,-0.187044,-1.122267,-0.237107,-0.237107,-0.237107,-0.237107,-1.422645,-0.54583,-0.54583,-0.54583,-0.54583,-3.274978,-0.548187,-0.548187,-0.548187,-0.548187,-3.289121,-0.548243,-0.548243,-0.548243,-0.548243,-3.289458,-0.250347,-0.250347,-0.250347,-0.250347,-1.50208,-0.220397,-0.220397,-0.220397,-0.220397,-1.322384,-0.052224,-0.052224,-0.052224,-0.052224,-0.313347,-0.154427,-0.154427,-0.154427,-0.154427,-0.926561,-1.054892,-1.054892,-1.054892,-1.054892,-6.329351,1.163275,1.069529,1.257021,1.163275,6.979649,-0.095202,-0.095202,-0.095202,-0.095202,-0.571211,-0.015439,-0.015439,-0.015439,-0.015439,-0.092635,6.0
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,-19932,-3038,-4311.0,-3458,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,100007.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-0.025735,-0.025735,-0.025735,-0.025735,-0.025735,-0.010902,-0.010902,-0.010902,-0.010902,-0.010902,-0.181546,-0.181546,-0.181546,-0.181546,-0.181546,-0.185991,-0.185991,-0.185991,-0.185991,-0.185991,-0.111609,-0.111609,-0.111609,-0.111609,-0.111609,-0.006385,-0.006385,-0.006385,-0.006385,-0.006385,-0.06662,-0.06662,-0.06662,-0.06662,-0.06662,-0.022388,-0.022388,-0.022388,-0.022388,-0.022388,-0.008668,-0.008668,-0.008668,-0.008668,-0.008668,-0.256447,-0.256447,-0.256447,-0.256447,-0.256447,-0.262577,-0.262577,-0.262577,-0.262577,-0.262577,0.333248,0.333248,0.333248,0.333248,0.333248,,,,,0.0,,,,,0.0,1.0,100007.0,4.833333,1.0,7.0,5.0,29.0,8.0,8.0,8.0,8.0,48.0,2.0,2.0,2.0,2.0,12.0,24.666667,24.0,25.0,25.0,148.0,2.666667,1.0,3.0,3.0,16.0,1.0,1.0,1.0,1.0,6.0,1.333333,1.0,2.0,1.0,8.0,19.666667,3.0,28.0,28.0,118.0,1.5,1.0,4.0,1.0,9.0,3.333333,3.0,4.0,3.0,20.0,2.166667,1.0,3.0,2.5,13.0,6.666667,3.0,11.0,5.0,40.0,3.0,0.0,7.0,2.0,18.0,3.5,2.0,5.0,3.5,21.0,9.166667,4.0,14.0,9.0,55.0,3.166667,1.0,5.0,3.5,19.0,1.0,1.0,1.0,1.0,6.0,1.0,1.0,1.0,1.0,6.0,1.666667,1.0,2.0,2.0,10.0,1.0,1.0,1.0,1.0,6.0,1.0,1.0,1.0,1.0,6.0,1.166667,1.0,2.0,1.0,7.0,1.166667,1.0,2.0,1.0,7.0,1.166667,1.0,2.0,1.0,7.0,1.166667,1.0,2.0,1.0,7.0,1.166667,1.0,2.0,1.0,7.0,1.166667,1.0,2.0,1.0,7.0,1.666667,1.0,2.0,2.0,10.0,2.0,2.0,2.0,2.0,12.0,2.0,2.0,2.0,2.0,12.0,-0.199408,-0.99201,0.589814,-0.029003,-1.196448,-0.084376,-0.539851,0.246828,0.054704,-0.506257,-0.092522,-0.569719,0.277128,0.005708,-0.555133,-0.121755,-0.162139,-0.021235,-0.162139,-0.730531,-0.180342,-0.655142,0.164915,-0.035361,-1.08205,0.425881,-0.39766,2.536205,-0.011625,2.555286,-0.439166,-1.894906,0.65034,-0.135824,-2.634994,0.197503,0.197503,0.197503,0.197503,1.185016,-0.161437,-0.181546,-0.146426,-0.157253,-0.968624,0.119345,-0.390938,2.62817,-0.379311,0.716069,-0.24657,-0.26198,-0.233468,-0.244145,-1.479422,0.09017,-0.405766,2.528448,-0.394405,0.541018,-0.045245,-1.344975,0.754588,0.154713,-0.271471,0.755685,-0.497734,2.009104,0.755685,4.534108,0.05954,0.05954,0.05954,0.05954,0.357239,0.307398,-0.173897,2.064424,-0.173897,1.84439,0.000521,0.000521,0.000521,0.000521,0.003127,0.031098,0.031098,0.031098,0.031098,0.186587,0.013359,-0.044189,0.124315,-0.04005,0.080154,2157812.0,1692033.0,2730157.0,2060607.5,12946871.0,6.0,100007.0,1.287879,1.0,8.0,1.0,85.0,1.0,1.0,1.0,1.0,66.0,1.0,1.0,1.0,1.0,66.0,-0.14547,-0.590574,0.577824,-0.42366,-9.601001,-0.13564,-0.944013,1.218932,-0.223031,-8.952224,0.052797,-1.610776,1.304836,0.288208,3.484571,-0.087458,-0.087458,-0.087458,-0.087458,-5.77224,-0.019976,-0.019976,-0.019976,-0.019976,-1.318426,66.0,100007.0,1.0,1.0,1.0,1.0,66.0,1.0,1.0,1.0,1.0,66.0,-0.0867,-0.301148,0.111288,-0.020037,-5.722227,-0.091763,-0.314929,0.099443,-0.021901,-6.056383,0.023514,-1.582665,1.295511,0.24805,1.551944,0.01706,-1.602767,1.283819,0.238805,1.125945,-0.443497,-0.670224,-0.070165,-0.445202,-29.270822,0.299483,0.138486,1.104468,0.138486,19.765869,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,1.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.009848,0.0,0.43,0.0,0.65,0.0,0.0,0.0,0.0,0.0,66.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [137]:
df_info_summary(train_df_join)

                                          Non-Null Count  Null Count  % Null    Dtype
SK_ID_CURR                                        307511           0    0.00    int64
TARGET                                            307511           0    0.00    int64
NAME_CONTRACT_TYPE                                307511           0    0.00   object
CODE_GENDER                                       307511           0    0.00   object
FLAG_OWN_CAR                                      307511           0    0.00   object
FLAG_OWN_REALTY                                   307511           0    0.00   object
CNT_CHILDREN                                      307511           0    0.00    int64
AMT_INCOME_TOTAL                                  307511           0    0.00  float64
AMT_CREDIT                                        307511           0    0.00  float64
AMT_ANNUITY                                       307499          12    0.00  float64
AMT_GOODS_PRICE                                   3072

In [138]:
train_df_join.shape

(307511, 757)

In [139]:
# Guardar df final en formato parquet
train_df_join.to_parquet(os.path.join(PATH, "train.parquet"))