#### 🚦 **`Módulos Necesarios`**

In [None]:
# Contiene la configuracion de la base de datos
import config

import pandas as pd 
import psycopg2
import warnings

warnings.filterwarnings('ignore')


In [2]:
pd.set_option('display.max_columns', None)

#### 🗳️ **`Load credit_card_balance.csv`**

In [3]:
# Set de datos de entrenamiento
df = pd.read_csv("../data_raw/credit_card_balance.csv")

In [4]:
# Dimension del dataset 
df.shape 

(3840312, 23)

In [5]:
# Columnas
df.columns

Index(['SK_ID_PREV', 'SK_ID_CURR', 'MONTHS_BALANCE', 'AMT_BALANCE',
       'AMT_CREDIT_LIMIT_ACTUAL', 'AMT_DRAWINGS_ATM_CURRENT',
       'AMT_DRAWINGS_CURRENT', 'AMT_DRAWINGS_OTHER_CURRENT',
       'AMT_DRAWINGS_POS_CURRENT', 'AMT_INST_MIN_REGULARITY',
       'AMT_PAYMENT_CURRENT', 'AMT_PAYMENT_TOTAL_CURRENT',
       'AMT_RECEIVABLE_PRINCIPAL', 'AMT_RECIVABLE', 'AMT_TOTAL_RECEIVABLE',
       'CNT_DRAWINGS_ATM_CURRENT', 'CNT_DRAWINGS_CURRENT',
       'CNT_DRAWINGS_OTHER_CURRENT', 'CNT_DRAWINGS_POS_CURRENT',
       'CNT_INSTALMENT_MATURE_CUM', 'NAME_CONTRACT_STATUS', 'SK_DPD',
       'SK_DPD_DEF'],
      dtype='object')

In [6]:
df.head(5)

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.425,31460.085,31460.085,0.0,0,0.0,0.0,30.0,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.285,233048.97,233048.97,1.0,1,0.0,0.0,10.0,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.395,453919.455,453919.455,0.0,1,0.0,1.0,101.0,Active,0,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3840312 entries, 0 to 3840311
Data columns (total 23 columns):
 #   Column                      Dtype  
---  ------                      -----  
 0   SK_ID_PREV                  int64  
 1   SK_ID_CURR                  int64  
 2   MONTHS_BALANCE              int64  
 3   AMT_BALANCE                 float64
 4   AMT_CREDIT_LIMIT_ACTUAL     int64  
 5   AMT_DRAWINGS_ATM_CURRENT    float64
 6   AMT_DRAWINGS_CURRENT        float64
 7   AMT_DRAWINGS_OTHER_CURRENT  float64
 8   AMT_DRAWINGS_POS_CURRENT    float64
 9   AMT_INST_MIN_REGULARITY     float64
 10  AMT_PAYMENT_CURRENT         float64
 11  AMT_PAYMENT_TOTAL_CURRENT   float64
 12  AMT_RECEIVABLE_PRINCIPAL    float64
 13  AMT_RECIVABLE               float64
 14  AMT_TOTAL_RECEIVABLE        float64
 15  CNT_DRAWINGS_ATM_CURRENT    float64
 16  CNT_DRAWINGS_CURRENT        int64  
 17  CNT_DRAWINGS_OTHER_CURRENT  float64
 18  CNT_DRAWINGS_POS_CURRENT    float64
 19  CNT_INSTALMENT_MATURE

⚠️ **`Warning`** : Hay columnas que vienen en float en el archivo csv, sin embargo, al revisar las descripciones en el archivo `Homecredit_columns_description.csv` se indica que deben ser enteros. Por lo tanto, se debe convertir a enteros las columnas que son float.

In [16]:
# Modificar el tipo de dato de las columnas CNT_* ya que deben ser enteros (considerar NaN)

df['CNT_DRAWINGS_ATM_CURRENT'] = df['CNT_DRAWINGS_ATM_CURRENT'].astype('Int64')
df['CNT_DRAWINGS_CURRENT'] = df['CNT_DRAWINGS_CURRENT'].astype('Int64')
df['CNT_DRAWINGS_OTHER_CURRENT'] = df['CNT_DRAWINGS_OTHER_CURRENT'].astype('Int64')
df['CNT_DRAWINGS_POS_CURRENT'] = df['CNT_DRAWINGS_POS_CURRENT'].astype('Int64')
df['CNT_INSTALMENT_MATURE_CUM'] = df['CNT_INSTALMENT_MATURE_CUM'].astype('Int64')


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3840312 entries, 0 to 3840311
Data columns (total 23 columns):
 #   Column                      Dtype  
---  ------                      -----  
 0   SK_ID_PREV                  int64  
 1   SK_ID_CURR                  int64  
 2   MONTHS_BALANCE              int64  
 3   AMT_BALANCE                 float64
 4   AMT_CREDIT_LIMIT_ACTUAL     int64  
 5   AMT_DRAWINGS_ATM_CURRENT    float64
 6   AMT_DRAWINGS_CURRENT        float64
 7   AMT_DRAWINGS_OTHER_CURRENT  float64
 8   AMT_DRAWINGS_POS_CURRENT    float64
 9   AMT_INST_MIN_REGULARITY     float64
 10  AMT_PAYMENT_CURRENT         float64
 11  AMT_PAYMENT_TOTAL_CURRENT   float64
 12  AMT_RECEIVABLE_PRINCIPAL    float64
 13  AMT_RECIVABLE               float64
 14  AMT_TOTAL_RECEIVABLE        float64
 15  CNT_DRAWINGS_ATM_CURRENT    Int64  
 16  CNT_DRAWINGS_CURRENT        Int64  
 17  CNT_DRAWINGS_OTHER_CURRENT  Int64  
 18  CNT_DRAWINGS_POS_CURRENT    Int64  
 19  CNT_INSTALMENT_MATURE

In [18]:
# Exporta tmp con conversiones
df.to_csv("/tmp/credit_card_balance_bkp.csv", index = False)

#### 🚀 **`Conexión a BD Credit Data`**

In [19]:
conn = psycopg2.connect(
    dbname = config.DATABASE,
    user = config.USER, 
    password = config.PASS,
    host = "localhost",
    port = "5432"
)

cursor = conn.cursor()

In [20]:
# Ejecutar la consulta para obtener los nombres de las columnas
cursor.execute("""
    SELECT COUNT(*)
    FROM credit_card_balance
""")

#### ✅ **`Check Upload`**

In [21]:
# Obtener el resultado
resultado = cursor.fetchone()
print(f"El número de registros en la tabla credit_card_balance es: {resultado[0]}")

El número de registros en la tabla credit_card_balance es: 3840312


In [29]:
df_resultados = pd.read_sql_query("""
    SELECT *
    FROM credit_card_balance
    LIMIT 10
""", conn)

df_resultados

  df_resultados = pd.read_sql_query("""


Unnamed: 0,sk_id_prev,sk_id_curr,months_balance,amt_balance,amt_credit_limit_actual,amt_drawings_atm_current,amt_drawings_current,amt_drawings_other_current,amt_drawings_pos_current,amt_inst_min_regularity,amt_payment_current,amt_payment_total_current,amt_receivable_principal,amt_recivable,amt_total_receivable,cnt_drawings_atm_current,cnt_drawings_current,cnt_drawings_other_current,cnt_drawings_pos_current,cnt_instalment_mature_cum,name_contract_status,sk_dpd,sk_dpd_def
0,2562384,378907,-6,56.97,135000.0,0.0,877.5,0.0,877.5,1700.33,1800.0,1800.0,0.0,0.0,0.0,0,1,0,1,35,Active,0,0
1,2582071,363914,-1,63975.56,45000.0,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.56,64875.56,1,1,0,0,69,Active,0,0
2,1740877,371185,-7,31815.23,450000.0,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.43,31460.09,31460.09,0,0,0,0,30,Active,0,0
3,1389973,337855,-4,236572.11,225000.0,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.29,233048.97,233048.97,1,1,0,0,10,Active,0,0
4,1891521,126868,-1,453919.46,450000.0,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.4,453919.46,453919.46,0,1,0,1,101,Active,0,0
5,2646502,380010,-7,82903.82,270000.0,0.0,0.0,0.0,0.0,4449.11,3825.0,3825.0,80519.04,82773.32,82773.32,0,0,0,0,2,Active,7,0
6,1079071,171320,-6,353451.65,585000.0,67500.0,67500.0,0.0,0.0,14684.18,15750.0,15750.0,345433.86,351881.15,351881.15,1,1,0,0,6,Active,0,0
7,2095912,118650,-7,47962.13,45000.0,45000.0,45000.0,0.0,0.0,0.0,264.69,0.0,44735.31,47962.13,47962.13,1,1,0,0,51,Active,0,0
8,2181852,367360,-4,291543.08,292500.0,90000.0,289339.43,0.0,199339.43,130.5,4093.52,4093.52,285376.41,286831.58,286831.58,3,8,0,5,3,Active,0,0
9,1235299,203885,-5,201261.2,225000.0,76500.0,111026.7,0.0,34526.7,6338.34,45000.0,45000.0,192793.28,197224.7,197224.7,3,9,0,6,38,Active,0,0


In [30]:
df.head(10)

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0,1,0,1,35,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1,1,0,0,69,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.425,31460.085,31460.085,0,0,0,0,30,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.285,233048.97,233048.97,1,1,0,0,10,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.395,453919.455,453919.455,0,1,0,1,101,Active,0,0
5,2646502,380010,-7,82903.815,270000,0.0,0.0,0.0,0.0,4449.105,3825.0,3825.0,80519.04,82773.315,82773.315,0,0,0,0,2,Active,7,0
6,1079071,171320,-6,353451.645,585000,67500.0,67500.0,0.0,0.0,14684.175,15750.0,15750.0,345433.86,351881.145,351881.145,1,1,0,0,6,Active,0,0
7,2095912,118650,-7,47962.125,45000,45000.0,45000.0,0.0,0.0,0.0,264.69,0.0,44735.31,47962.125,47962.125,1,1,0,0,51,Active,0,0
8,2181852,367360,-4,291543.075,292500,90000.0,289339.425,0.0,199339.425,130.5,4093.515,4093.515,285376.41,286831.575,286831.575,3,8,0,5,3,Active,0,0
9,1235299,203885,-5,201261.195,225000,76500.0,111026.7,0.0,34526.7,6338.34,45000.0,45000.0,192793.275,197224.695,197224.695,3,9,0,6,38,Active,0,0
