# Vassoura quickstart

In [1]:
import pandas as pd
from vassoura import Vassoura
pd.set_option('display.max_columns', None)

FILE_PATH_1 = '../../datasets/lending_club/accepted_2007_to_2018Q4.csv'

# Carregar dataset de exemplo
def read_and_clean_csv_mixed_types(path, nrows=None, verbose=True):
    """
    Lê um arquivo CSV, detecta colunas com tipos mistos e aplica conversão automática.
    
    Parâmetros:
    - path: caminho para o arquivo CSV
    - nrows: número de linhas a serem lidas (None = todas)
    - verbose: se True, imprime colunas com tipos mistos

    Retorna:
    - DataFrame limpo
    - Dicionário com as colunas que tinham tipos mistos
    """
    # Leitura inicial
    df = pd.read_csv(path, low_memory=False, nrows=nrows)

    # Identificar colunas com tipos mistos
    mixed_type_columns = {}
    for col in df.columns:
        types_in_col = df[col].dropna().apply(type).value_counts()
        if len(types_in_col) > 1:
            mixed_type_columns[col] = types_in_col
            if verbose:
                print(f"\n[!] Coluna '{col}' tem múltiplos tipos:")
                print(types_in_col)

    # Tentativa de padronização
    for col in mixed_type_columns:
        try:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            if verbose:
                print(f"[✓] Coluna '{col}' convertida para float.")
        except Exception:
            df[col] = df[col].astype(str)
            if verbose:
                print(f"[✓] Coluna '{col}' convertida para string.")

    return df, mixed_type_columns

df, problemas_1 = read_and_clean_csv_mixed_types(FILE_PATH_1, nrows=500_000)

print(df.shape)
display(df.head(3))

(500000, 151)


Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,fico_range_low,fico_range_high,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,last_fico_range_high,last_fico_range_low,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_fico_range_low,sec_app_fico_range_high,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,68407277,,3600.0,3600.0,3600.0,36 months,13.99,123.03,C,C4,leadman,10+ years,MORTGAGE,55000.0,Not Verified,Dec-2015,Fully Paid,n,https://lendingclub.com/browse/loanDetail.acti...,,debt_consolidation,Debt consolidation,190xx,PA,5.91,0.0,Aug-2003,675.0,679.0,1.0,30.0,,7.0,0.0,2765.0,29.7,13.0,w,0.0,0.0,4421.723917,4421.72,3600.0,821.72,0.0,0.0,0.0,Jan-2019,122.67,,Mar-2019,564.0,560.0,0.0,30.0,1.0,Individual,,,,0.0,722.0,144904.0,2.0,2.0,0.0,1.0,21.0,4981.0,36.0,3.0,3.0,722.0,34.0,9300.0,3.0,1.0,4.0,4.0,20701.0,1506.0,37.2,0.0,0.0,148.0,128.0,3.0,3.0,1.0,4.0,69.0,4.0,69.0,2.0,2.0,4.0,2.0,5.0,3.0,4.0,9.0,4.0,7.0,0.0,0.0,0.0,3.0,76.9,0.0,0.0,0.0,178050.0,7746.0,2400.0,13734.0,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
1,68355089,,24700.0,24700.0,24700.0,36 months,11.99,820.28,C,C1,Engineer,10+ years,MORTGAGE,65000.0,Not Verified,Dec-2015,Fully Paid,n,https://lendingclub.com/browse/loanDetail.acti...,,small_business,Business,577xx,SD,16.06,1.0,Dec-1999,715.0,719.0,4.0,6.0,,22.0,0.0,21470.0,19.2,38.0,w,0.0,0.0,25679.66,25679.66,24700.0,979.66,0.0,0.0,0.0,Jun-2016,926.35,,Mar-2019,699.0,695.0,0.0,,1.0,Individual,,,,0.0,0.0,204396.0,1.0,1.0,0.0,1.0,19.0,18005.0,73.0,2.0,3.0,6472.0,29.0,111800.0,0.0,0.0,6.0,4.0,9733.0,57830.0,27.1,0.0,0.0,113.0,192.0,2.0,2.0,4.0,2.0,,0.0,6.0,0.0,5.0,5.0,13.0,17.0,6.0,20.0,27.0,5.0,22.0,0.0,0.0,0.0,2.0,97.4,7.7,0.0,0.0,314017.0,39475.0,79300.0,24667.0,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
2,68341763,,20000.0,20000.0,20000.0,60 months,10.78,432.66,B,B4,truck driver,10+ years,MORTGAGE,63000.0,Not Verified,Dec-2015,Fully Paid,n,https://lendingclub.com/browse/loanDetail.acti...,,home_improvement,,605xx,IL,10.78,0.0,Aug-2000,695.0,699.0,0.0,,,6.0,0.0,7869.0,56.2,18.0,w,0.0,0.0,22705.924294,22705.92,20000.0,2705.92,0.0,0.0,0.0,Jun-2017,15813.3,,Mar-2019,704.0,700.0,0.0,,1.0,Joint App,71000.0,13.85,Not Verified,0.0,0.0,189699.0,0.0,1.0,0.0,4.0,19.0,10827.0,73.0,0.0,2.0,2081.0,65.0,14000.0,2.0,5.0,1.0,6.0,31617.0,2737.0,55.9,0.0,0.0,125.0,184.0,14.0,14.0,5.0,101.0,,10.0,,0.0,2.0,3.0,2.0,4.0,6.0,4.0,7.0,3.0,6.0,0.0,0.0,0.0,0.0,100.0,50.0,0.0,0.0,218418.0,18696.0,6200.0,14877.0,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


#### Análise de Target

In [2]:
TARGET = 'target_risco_credito'

status_de_risco = [
    "Charged Off",
    "Default",
    "Late (31-120 days)",
    #"Late (16-30 days)"
]
df[TARGET] = df["loan_status"].isin(status_de_risco).astype(int)

df[TARGET].value_counts(dropna=False, normalize=True) * 100

target_risco_credito
0    83.639
1    16.361
Name: proportion, dtype: float64

### Safras

In [3]:
temporal_columns = [
    "issue_d",                      # Data de emissão do empréstimo
    "earliest_cr_line",             # Primeira linha de crédito do cliente
    "last_pymnt_d",                 # Último pagamento realizado
    "last_credit_pull_d",           # Última consulta ao crédito
    "next_pymnt_d",                 # Próximo pagamento previsto (se aplicável)
    #"last_pymnt_amnt",             # (associada à data de pagamento)
    "debt_settlement_flag_date",    # Data em que houve acordo de dívida
    "settlement_date"               # Data em que o acordo foi fechado
]


# tratamento para colunas de data
for col in temporal_columns:
    df[col] = pd.to_datetime(df[col], format="%b-%Y")


df["safra"] = df["issue_d"].dt.to_period("M")#.astype(str)  # formato 'YYYY-MM'

print(df["safra"].min())
print(df["safra"].max())

2015-01
2018-03


In [4]:
assert set(df[TARGET].dropna().unique()) == {0, 1}, "TARGET não é binário!"

In [5]:
df.head(3)

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,fico_range_low,fico_range_high,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,last_fico_range_high,last_fico_range_low,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_fico_range_low,sec_app_fico_range_high,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term,target_risco_credito,safra
0,68407277,,3600.0,3600.0,3600.0,36 months,13.99,123.03,C,C4,leadman,10+ years,MORTGAGE,55000.0,Not Verified,2015-12-01,Fully Paid,n,https://lendingclub.com/browse/loanDetail.acti...,,debt_consolidation,Debt consolidation,190xx,PA,5.91,0.0,2003-08-01,675.0,679.0,1.0,30.0,,7.0,0.0,2765.0,29.7,13.0,w,0.0,0.0,4421.723917,4421.72,3600.0,821.72,0.0,0.0,0.0,2019-01-01,122.67,NaT,2019-03-01,564.0,560.0,0.0,30.0,1.0,Individual,,,,0.0,722.0,144904.0,2.0,2.0,0.0,1.0,21.0,4981.0,36.0,3.0,3.0,722.0,34.0,9300.0,3.0,1.0,4.0,4.0,20701.0,1506.0,37.2,0.0,0.0,148.0,128.0,3.0,3.0,1.0,4.0,69.0,4.0,69.0,2.0,2.0,4.0,2.0,5.0,3.0,4.0,9.0,4.0,7.0,0.0,0.0,0.0,3.0,76.9,0.0,0.0,0.0,178050.0,7746.0,2400.0,13734.0,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,NaT,,NaT,,,,0,2015-12
1,68355089,,24700.0,24700.0,24700.0,36 months,11.99,820.28,C,C1,Engineer,10+ years,MORTGAGE,65000.0,Not Verified,2015-12-01,Fully Paid,n,https://lendingclub.com/browse/loanDetail.acti...,,small_business,Business,577xx,SD,16.06,1.0,1999-12-01,715.0,719.0,4.0,6.0,,22.0,0.0,21470.0,19.2,38.0,w,0.0,0.0,25679.66,25679.66,24700.0,979.66,0.0,0.0,0.0,2016-06-01,926.35,NaT,2019-03-01,699.0,695.0,0.0,,1.0,Individual,,,,0.0,0.0,204396.0,1.0,1.0,0.0,1.0,19.0,18005.0,73.0,2.0,3.0,6472.0,29.0,111800.0,0.0,0.0,6.0,4.0,9733.0,57830.0,27.1,0.0,0.0,113.0,192.0,2.0,2.0,4.0,2.0,,0.0,6.0,0.0,5.0,5.0,13.0,17.0,6.0,20.0,27.0,5.0,22.0,0.0,0.0,0.0,2.0,97.4,7.7,0.0,0.0,314017.0,39475.0,79300.0,24667.0,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,NaT,,NaT,,,,0,2015-12
2,68341763,,20000.0,20000.0,20000.0,60 months,10.78,432.66,B,B4,truck driver,10+ years,MORTGAGE,63000.0,Not Verified,2015-12-01,Fully Paid,n,https://lendingclub.com/browse/loanDetail.acti...,,home_improvement,,605xx,IL,10.78,0.0,2000-08-01,695.0,699.0,0.0,,,6.0,0.0,7869.0,56.2,18.0,w,0.0,0.0,22705.924294,22705.92,20000.0,2705.92,0.0,0.0,0.0,2017-06-01,15813.3,NaT,2019-03-01,704.0,700.0,0.0,,1.0,Joint App,71000.0,13.85,Not Verified,0.0,0.0,189699.0,0.0,1.0,0.0,4.0,19.0,10827.0,73.0,0.0,2.0,2081.0,65.0,14000.0,2.0,5.0,1.0,6.0,31617.0,2737.0,55.9,0.0,0.0,125.0,184.0,14.0,14.0,5.0,101.0,,10.0,,0.0,2.0,3.0,2.0,4.0,6.0,4.0,7.0,3.0,6.0,0.0,0.0,0.0,0.0,100.0,50.0,0.0,0.0,218418.0,18696.0,6200.0,14877.0,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,NaT,,NaT,,,,0,2015-12


In [6]:
drop_cols = ['id', 'member_id', 'url', 'safra'] + temporal_columns

print(df.shape)
df.drop(columns=drop_cols, inplace=True)

# Mantém apenas colunas numéricas
df = df.select_dtypes(include=['number'])

print(df.shape)
df.fillna(-1, inplace=True)
# df.dropna(axis=0, inplace=True)
print(df.shape)

(500000, 153)
(500000, 113)
(500000, 113)


In [20]:
v = Vassoura(
    target_col='target_risco_credito',
    model_name='logistic_balanced',
    report=True
)
v.fit(df)
print(v.get_feature_ranking())

[2025-06-12 00:11:48,090] INFO – === Vassoura Fit Started ===
[2025-06-12 00:11:50,376] INFO – [SampleManager] strategy='auto', triggered=True, original_rows=500000, sampled_rows=100000, frac=0.2
[2025-06-12 00:11:50,581] INFO – [CV] Using StratifiedKFold(n_splits=5, shuffle=True)
[2025-06-12 00:11:52,389] INFO – [DynamicScaler] strategy='auto' | examined=112 cols | scaled=104 (Std:15, QT:89, MinMax:0) | skipped=8
[2025-06-12 00:12:05,409] INFO – [DynamicScaler] strategy='auto' | examined=112 cols | scaled=104 (Std:15, QT:89, MinMax:0) | skipped=8
[2025-06-12 00:12:16,269] INFO – [DynamicScaler] strategy='auto' | examined=112 cols | scaled=104 (Std:15, QT:89, MinMax:0) | skipped=8
[2025-06-12 00:12:27,227] INFO – [DynamicScaler] strategy='auto' | examined=112 cols | scaled=104 (Std:15, QT:89, MinMax:0) | skipped=8
[2025-06-12 00:12:37,942] INFO – [DynamicScaler] strategy='auto' | examined=112 cols | scaled=104 (Std:15, QT:89, MinMax:0) | skipped=8
[2025-06-12 00:12:49,300] INFO – [Dyna

loan_amnt                        -3.151246
funded_amnt                      -3.151246
funded_amnt_inv                  -3.199636
int_rate                         -0.009745
installment                      -0.057517
                                    ...   
hardship_payoff_balance_amount    0.368952
hardship_last_payment_amount     -0.029248
settlement_amount                -0.075283
settlement_percentage             0.000465
settlement_term                  -0.004044
Name: mean, Length: 112, dtype: float64

loan_amnt                        -3.151246
funded_amnt                      -3.151246
funded_amnt_inv                  -3.199636
int_rate                         -0.009745
installment                      -0.057517
                                    ...   
hardship_payoff_balance_amount    0.368952
hardship_last_payment_amount     -0.029248
settlement_amount                -0.075283
settlement_percentage             0.000465
settlement_term                  -0.004044
Name: mean, Length: 112, dtype: float64

[2025-06-12 00:13:07,168] INFO – === Vassoura Fit Completed ===
total_rec_prncp             4.378149e-03
out_prncp_inv               3.069883e-03
out_prncp                   3.069493e-03
total_rec_int               2.130599e-03
last_pymnt_amnt             2.125474e-03
                                ...     
num_tl_90g_dpd_24m          2.452086e-08
mort_acc                    2.133621e-08
acc_now_delinq              5.049092e-09
chargeoff_within_12_mths    4.093999e-09
num_tl_30dpd                2.706636e-09
Length: 113, dtype: float64


In [28]:
v.audit_.describe_snapshot('raw')


📄 Descrição do snapshot 'raw':

▶️ Shape: (500000, 113)
▶️ Chaves de duplicação: None

🧱 Tipos de dados:


loan_amnt                       float64
funded_amnt                     float64
funded_amnt_inv                 float64
int_rate                        float64
installment                     float64
                                 ...   
hardship_last_payment_amount    float64
settlement_amount               float64
settlement_percentage           float64
settlement_term                 float64
target_risco_credito              int32
Length: 113, dtype: object


🔎 Colunas detectadas automaticamente:
   • Numéricas (112): ['loan_amnt', 'funded_amnt', 'funded_amnt_inv', 'int_rate', 'installment', 'annual_inc', 'dti', 'delinq_2yrs', 'fico_range_low', 'fico_range_high', 'inq_last_6mths', 'mths_since_last_delinq', 'mths_since_last_record', 'open_acc', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'out_prncp', 'out_prncp_inv', 'total_pymnt', 'total_pymnt_inv', 'total_rec_prncp', 'total_rec_int', 'total_rec_late_fee', 'recoveries', 'collection_recovery_fee', 'last_pymnt_amnt', 'last_fico_range_high', 'last_fico_range_low', 'collections_12_mths_ex_med', 'mths_since_last_major_derog', 'policy_code', 'annual_inc_joint', 'dti_joint', 'acc_now_delinq', 'tot_coll_amt', 'tot_cur_bal', 'open_acc_6m', 'open_act_il', 'open_il_12m', 'open_il_24m', 'mths_since_rcnt_il', 'total_bal_il', 'il_util', 'open_rv_12m', 'open_rv_24m', 'max_bal_bc', 'all_util', 'total_rev_hi_lim', 'inq_fi', 'total_cu_tl', 'inq_last_12m', 'acc_open_past_24mths', 'avg_cur_bal', 'bc_op

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
loan_amnt,500000.0,15388.898996,8852.306965,-1.0,8500.00,14000.00,20000.00,40000.00
funded_amnt,500000.0,15388.898996,8852.306965,-1.0,8500.00,14000.00,20000.00,40000.00
funded_amnt_inv,500000.0,15383.159196,8849.391839,-1.0,8500.00,14000.00,20000.00,40000.00
int_rate,500000.0,12.574153,4.438302,-1.0,9.17,12.29,15.59,30.99
installment,500000.0,446.387041,252.570107,-1.0,262.75,385.91,589.22,1607.80
...,...,...,...,...,...,...,...,...
hardship_last_payment_amount,500000.0,0.026961,19.307820,-1.0,-1.00,-1.00,-1.00,1247.48
settlement_amount,500000.0,113.914446,917.939990,-1.0,-1.00,-1.00,-1.00,30000.00
settlement_percentage,500000.0,0.134873,7.379943,-1.0,-1.00,-1.00,-1.00,166.67
settlement_term,500000.0,-0.674046,2.417355,-1.0,-1.00,-1.00,-1.00,112.00



🏷️ Estatísticas categóricas:


In [29]:
v.audit_.describe_snapshot('processed')


📄 Descrição do snapshot 'processed':

▶️ Shape: (100000, 113)
▶️ Chaves de duplicação: None

🧱 Tipos de dados:


loan_amnt                       float64
funded_amnt                     float64
funded_amnt_inv                 float64
int_rate                        float64
installment                     float64
                                 ...   
hardship_last_payment_amount    float64
settlement_amount               float64
settlement_percentage           float64
settlement_term                 float64
target_risco_credito              int32
Length: 113, dtype: object


🔎 Colunas detectadas automaticamente:
   • Numéricas (112): ['loan_amnt', 'funded_amnt', 'funded_amnt_inv', 'int_rate', 'installment', 'annual_inc', 'dti', 'delinq_2yrs', 'fico_range_low', 'fico_range_high', 'inq_last_6mths', 'mths_since_last_delinq', 'mths_since_last_record', 'open_acc', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'out_prncp', 'out_prncp_inv', 'total_pymnt', 'total_pymnt_inv', 'total_rec_prncp', 'total_rec_int', 'total_rec_late_fee', 'recoveries', 'collection_recovery_fee', 'last_pymnt_amnt', 'last_fico_range_high', 'last_fico_range_low', 'collections_12_mths_ex_med', 'mths_since_last_major_derog', 'policy_code', 'annual_inc_joint', 'dti_joint', 'acc_now_delinq', 'tot_coll_amt', 'tot_cur_bal', 'open_acc_6m', 'open_act_il', 'open_il_12m', 'open_il_24m', 'mths_since_rcnt_il', 'total_bal_il', 'il_util', 'open_rv_12m', 'open_rv_24m', 'max_bal_bc', 'all_util', 'total_rev_hi_lim', 'inq_fi', 'total_cu_tl', 'inq_last_12m', 'acc_open_past_24mths', 'avg_cur_bal', 'bc_op

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
loan_amnt,100000.0,15385.747750,8840.686311,1000.00,8500.00,14000.00,20000.00,40000.00
funded_amnt,100000.0,15385.747750,8840.686311,1000.00,8500.00,14000.00,20000.00,40000.00
funded_amnt_inv,100000.0,15379.959560,8837.795686,900.00,8500.00,14000.00,20000.00,40000.00
int_rate,100000.0,12.564408,4.445839,5.31,9.17,12.29,15.59,30.94
installment,100000.0,446.329524,252.461029,14.01,262.41,387.15,589.22,1587.23
...,...,...,...,...,...,...,...,...
hardship_last_payment_amount,100000.0,-0.002287,18.773232,-1.00,-1.00,-1.00,-1.00,1072.99
settlement_amount,100000.0,113.839164,920.038570,-1.00,-1.00,-1.00,-1.00,27000.00
settlement_percentage,100000.0,0.135338,7.408845,-1.00,-1.00,-1.00,-1.00,166.67
settlement_term,100000.0,-0.678090,2.383972,-1.00,-1.00,-1.00,-1.00,36.00



🏷️ Estatísticas categóricas:


In [27]:
v.pipeline_

In [30]:
v.export_report(path='./src/vassoura/reports/report.html')

loan_amnt                        -3.151246
funded_amnt                      -3.151246
funded_amnt_inv                  -3.199636
int_rate                         -0.009745
installment                      -0.057517
                                    ...   
hardship_payoff_balance_amount    0.368952
hardship_last_payment_amount     -0.029248
settlement_amount                -0.075283
settlement_percentage             0.000465
settlement_term                  -0.004044
Name: mean, Length: 112, dtype: float64

In [None]:
loan_amnt                        -3.151246
funded_amnt                      -3.151246
funded_amnt_inv                  -3.199636
int_rate                         -0.009745
installment                      -0.057517
hardship_payoff_balance_amount
hardship_last_payment_amount
settlement_amount
settlement_percentage
settlement_term