# Binary Performance Evaluator – Demo Completo

Neste exemplo vamos treinar um classificador simples e explorar várias funcionalidades do `BinaryPerformanceEvaluator`. Usaremos dados sintéticos.

In [26]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from riskpilot.evaluation import BinaryPerformanceEvaluator
from riskpilot.synthetic import SyntheticVintageGenerator

from sklearn.utils.class_weight import compute_sample_weight

In [27]:
# --------------------------------------------------
# 1. CONFIGURAÇÕES
# --------------------------------------------------
FILE_PATH = "../../datasets/lending_club/accepted_2007_to_2018Q4.csv"
NROWS     = 10_000
TARGET_RAW = "loan_status"          # coluna original
TARGET     = "target"               # nome final binário

# --------------------------------------------------
# 2. LEITURA E CONVERSÃO DE TIPOS MISTOS
# --------------------------------------------------
def read_and_clean_csv_mixed_types(path, nrows=None, verbose=True):
    df = pd.read_csv(path, low_memory=False, nrows=nrows)

    # identificar colunas com tipos mistos
    for col in df.columns:
        types = df[col].dropna().map(type).value_counts()
        if len(types) > 1 and verbose:
            print(f"[!] '{col}' com múltiplos tipos: {dict(types)}")

        # tentativa de conversão automática p/ numérico
        if len(types) > 1:
            try:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            except Exception:
                df[col] = df[col].astype(str)

    return df

df = read_and_clean_csv_mixed_types(FILE_PATH, nrows=NROWS)

# drop de colunas não usadas (caso existam)
df.drop(columns=[c for c in ["member_id"] if c in df.columns], inplace=True)

# --------------------------------------------------
# 3. TARGET BINÁRIO
# --------------------------------------------------
bad_status = ["Charged Off", "Default", "Late (31-120 days)"]
df[TARGET] = df[TARGET_RAW].isin(bad_status).astype(int)

# --------------------------------------------------
# 4. DATA E ID
# --------------------------------------------------
#df["date"] = pd.to_datetime(df["issue_d"], format="%b-%Y", errors="coerce")

df["date"] = pd.to_datetime('202201', format="%Y%M", errors="coerce")

df.drop(columns=["issue_d", TARGET_RAW], inplace=True)

df.reset_index(drop=False, inplace=True)   # index ➜ nova coluna

# --------------------------------------------------
# 5. FEATURES NUMÉRICAS + IMPUTAÇÃO
# --------------------------------------------------
numeric_cols = df.select_dtypes(include=[np.number]).columns.drop(["id", TARGET])
na_threshold = 0.30
valid_cols = numeric_cols[df[numeric_cols].isna().mean() < na_threshold].tolist()
valid_cols.remove('index')

df[valid_cols] = df[valid_cols].fillna(df[valid_cols].median())

# --------------------------------------------------
# 6. DATAFRAME FINAL
# --------------------------------------------------

FEATURES = [
    "funded_amnt_inv",
    "annual_inc",
    "dti",
    "fico_range_high",
    "out_prncp_inv",
    "total_rec_prncp",
    "last_pymnt_amnt",
    "last_fico_range_low",
    "acc_open_past_24mths",
    "avg_cur_bal",
    "bc_open_to_buy",
    "mo_sin_old_rev_tl_op",
    "mo_sin_rcnt_rev_tl_op",
    "mo_sin_rcnt_tl",
    "mort_acc",
    "mths_since_recent_bc",
    "mths_since_recent_inq",
    "num_tl_op_past_12m",
    "percent_bc_gt_75",
    "total_bc_limit",
    #"sub_grade"
]

ID_COLS = ['id']

IGNORE_COLS = ['member_id']

DATE_COLS = ['date']

# --------------------------------------------------
# Preparar dataset base
# --------------------------------------------------
df_model = df[ID_COLS+DATE_COLS+[TARGET] + FEATURES].dropna().copy()

In [28]:
ID_COLS

['id']

In [29]:
import numpy as np
from riskpilot.synthetic import SyntheticVintageGenerator

gen = SyntheticVintageGenerator(
    id_cols=ID_COLS,
    date_cols=DATE_COLS
).fit(df_model)

In [30]:
synthetic = gen.generate(
    n_periods=36,
    freq='ME',
    scenario='stress')
synthetic.head()

Unnamed: 0,id,date,target,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,...,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
0,2324c0858cf74899ac1183a9eb510575,2025-06-30,0,14000.0,85000.0,20.879445,719.0,0.0,6000.0,153.384186,...,55108.782525,113.0,9.0,3.0,0,13.0,10.0,0.0,20.0,66535.584554
1,cd6a2d22605744a2a148a672a73dde33,2025-06-30,0,3600.0,20000.0,33.714352,749.0,0.0,3600.0,349.960981,...,39525.028911,182.0,11.0,7.0,4,48.560176,6.0,2.0,40.0,56732.646926
2,88830529b21b4d319c273bd3eea09b4d,2025-06-30,0,22000.0,93729.007573,7.507042,779.0,0.0,25928.546227,1022.25191,...,62975.413678,124.0,7.0,3.0,2,13.0,9.0,4.0,0.0,81510.172189
3,640fd3ce09bb42749ad7acbf3f070bd7,2025-06-30,0,12000.0,62000.0,11.164088,664.0,0.0,11845.130528,10152.251155,...,3127.573686,136.288064,0.0,0.0,0,0.0,2.0,8.0,66.7,18700.0
4,fd23945b4fe24d3b86484ca6def88f27,2025-06-30,0,4800.0,45000.0,10.931047,704.0,5912.087373,494.051448,0.623031,...,0.0,254.186744,25.0,28.760823,6,30.0,5.0,0.0,100.0,11500.0


In [31]:
# --------------------------------------------------
# 2. Split treino / teste
# --------------------------------------------------
train_idx, test_idx = train_test_split(
    df_model.index,
    test_size=0.30,
    stratify=df_model[TARGET],
    random_state=42,
)

# Criar df_train e df_test primeiro
df_train = df_model.loc[train_idx].copy()
df_test  = df_model.loc[test_idx].copy()

# Depois extrair X e y a partir dos DataFrames
X_train, y_train = df_train[FEATURES], df_train[TARGET]
X_test , y_test  = df_test[FEATURES] , df_test[TARGET]

In [32]:
# --------------------------------------------------
# Garantir cópias independentes dos conjuntos de treino e teste
# --------------------------------------------------
X_train = df_train[FEATURES].copy()
X_test = df_test[FEATURES].copy()

# --------------------------------------------------
# Converter colunas categóricas (tipo object) para 'category' em X e df
# --------------------------------------------------
def convert_object_to_category(df):
    obj_cols = df.select_dtypes(include='object').columns
    df[obj_cols] = df[obj_cols].astype('category')
    return df

# Aplicar nos conjuntos usados no modelo
X_train = convert_object_to_category(X_train)
X_test = convert_object_to_category(X_test)

# Garantir que df_train e df_test também estejam com tipos consistentes
df_train = convert_object_to_category(df_train)
df_test = convert_object_to_category(df_test)

# --------------------------------------------------
# Verificações finais
# --------------------------------------------------
print("Colunas 'object' restantes em X_train:", X_train.dtypes[X_train.dtypes == 'object'])
print("Colunas 'object' restantes em df_test:", df_test.dtypes[df_test.dtypes == 'object'])


Colunas 'object' restantes em X_train: Series([], dtype: object)
Colunas 'object' restantes em df_test: Series([], dtype: object)


In [33]:
num_cols = df_train.select_dtypes(include=[float, int]).columns.tolist()

for col in ['index', 'id', TARGET]:
    try:
        num_cols.remove(col)
    except:
        pass   

In [34]:
# Aplicar o DynamicScaler no treino
from riskpilot.utils.scaler import DynamicScaler
scaler = DynamicScaler(
    strategy="auto",
    shapiro_p_val = 0.01, # se aumentar fica mais restritiva a escolha de StandardScaler()
    #serialize=True,
    #save_path="scalers.pkl", # arquivo com informações de escalonamento salvas 
    random_state=42,
    ignore_cols=ID_COLS+IGNORE_COLS+DATE_COLS+[TARGET]
)

scaler.fit(df_train)

# Relatório das decisões
print("\nResumo das decisões por feature:")
display(scaler.report_as_df())

INFO: Ignoring columns (no scaling): ['target', 'id']



Resumo das decisões por feature:


Unnamed: 0,chosen_scaler,validation_stats,ignored,candidates_tried,reason
funded_amnt_inv,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
annual_inc,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.25859163507898...",[],[PowerTransformer],stats|skew|kurt
dti,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
fico_range_high,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
out_prncp_inv,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
total_rec_prncp,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
last_pymnt_amnt,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.57111067419720...",[],[PowerTransformer],stats|skew|kurt
last_fico_range_low,PowerTransformer,"{'post_std': 0.9999999999999998, 'post_iqr': 1...",[],[PowerTransformer],stats|skew|kurt
acc_open_past_24mths,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
avg_cur_bal,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.50721730183757...",[],[PowerTransformer],stats|skew|kurt


In [35]:
scaler.report_as_df()['candidates_tried']

funded_amnt_inv          [PowerTransformer, QuantileTransformer, Robust...
annual_inc                                              [PowerTransformer]
dti                      [PowerTransformer, QuantileTransformer, Robust...
fico_range_high          [PowerTransformer, QuantileTransformer, Robust...
out_prncp_inv            [PowerTransformer, QuantileTransformer, Robust...
total_rec_prncp          [PowerTransformer, QuantileTransformer, Robust...
last_pymnt_amnt                                         [PowerTransformer]
last_fico_range_low                                     [PowerTransformer]
acc_open_past_24mths     [PowerTransformer, QuantileTransformer, Robust...
avg_cur_bal                                             [PowerTransformer]
bc_open_to_buy                                          [PowerTransformer]
mo_sin_old_rev_tl_op               [PowerTransformer, QuantileTransformer]
mo_sin_rcnt_rev_tl_op                                   [PowerTransformer]
mo_sin_rcnt_tl           

In [36]:
display(X_train.head())

# 5. Transformar o conjunto de treino
df_train_scaled = scaler.transform(df_train, return_df=True)
X_train_scaled = scaler.transform(X_train, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_train_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
6440,15550.0,53000.0,32.72,704.0,0.0,1924.93,395.89,0.0,7.0,2571.0,1297.0,172.0,8.0,4.0,2.0,8.0,8.0,3.0,100.0,7300.0
582,5000.0,85000.0,8.28,674.0,0.0,5000.0,2742.11,650.0,2.0,50544.0,1773.0,301.0,2.0,2.0,6.0,2.0,0.0,2.0,0.0,2100.0
4660,6000.0,80000.0,6.78,664.0,0.0,6000.0,5967.67,690.0,2.0,4044.0,21.0,363.0,3.0,3.0,2.0,3.0,3.0,2.0,100.0,2000.0
8753,8000.0,44000.0,12.25,689.0,0.0,8000.0,265.52,670.0,5.0,1092.0,423.0,102.0,2.0,2.0,0.0,48.0,4.0,4.0,100.0,1800.0
8045,2000.0,38000.0,20.56,739.0,0.0,2000.0,62.23,640.0,1.0,2072.0,13487.0,110.0,26.0,13.0,0.0,26.0,19.0,0.0,20.0,19900.0


INFO: Untouched columns preserved: ['date', 'target', 'id']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
6440,15550.0,-0.457281,32.72,704.0,0.0,1924.93,-0.632941,-3.672808,7.0,-0.966288,-0.79771,0.04518,0.084247,-0.165189,0.591133,-0.392183,8.0,3.0,1.24478,-0.774825
582,5000.0,0.444742,8.28,674.0,0.0,5000.0,0.379891,-0.309933,2.0,1.792006,-0.642909,1.159515,-1.028981,-0.813399,1.607872,-1.428347,0.0,2.0,-1.490288,-1.596509
4660,6000.0,0.330593,6.78,664.0,0.0,6000.0,0.844349,0.139826,2.0,-0.59745,-2.095093,1.63085,-0.733502,-0.445587,0.591133,-1.155398,3.0,2.0,1.24478,-1.622986
8753,8000.0,-0.820729,12.25,689.0,0.0,8000.0,-0.818827,-0.088757,5.0,-1.619834,-1.273633,-1.004131,-1.028981,-0.813399,-1.136794,1.17364,4.0,4.0,1.24478,-1.67889
8045,2000.0,-1.110307,20.56,739.0,0.0,2000.0,-1.433181,-0.417751,1.0,-1.136258,0.662181,-0.864365,1.157264,1.075069,-1.136794,0.627386,19.0,0.0,-0.303982,0.142283


In [37]:
display(X_test.head())

# 5. Transformar o conjunto de treino
df_test_scaled = scaler.transform(df_test, return_df=True)
X_test_scaled = scaler.transform(X_test, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_test_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
5787,15000.0,55000.0,11.63,714.0,0.0,15000.0,3456.96,650.0,5.0,1987.0,7094.0,131.0,12.0,8.0,0.0,12.0,12.0,3.0,16.7,15900.0
8238,12000.0,50000.0,19.25,719.0,0.0,12000.0,7745.48,735.0,1.0,16492.0,11752.0,189.0,29.0,16.0,3.0,123.0,3.0,0.0,50.0,20700.0
5574,1500.0,93000.0,33.4,709.0,0.0,1500.0,1.24,655.0,2.0,22156.0,5546.0,307.0,19.0,4.0,3.0,19.0,5.0,1.0,85.7,73300.0
5644,20000.0,51450.0,27.83,699.0,0.0,20000.0,1302.57,740.0,3.0,7616.0,0.0,240.0,2.0,2.0,0.0,16.0,2.0,1.0,100.0,22100.0
790,4500.0,39600.0,22.82,709.0,0.0,4500.0,4437.16,605.0,1.0,18737.0,115.0,113.0,45.0,17.0,1.0,45.0,5.0,0.0,100.0,9100.0


INFO: Untouched columns preserved: ['date', 'target', 'id']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
5787,15000.0,-0.38549,11.63,714.0,0.0,15000.0,0.514573,-0.309933,5.0,-1.168833,0.185454,-0.49639,0.447809,0.553431,-1.136794,-0.049081,12.0,3.0,-0.418408,-0.08689
8238,12000.0,-0.570581,19.25,719.0,0.0,12000.0,1.008182,0.681335,1.0,0.658471,0.554349,0.253606,1.258111,1.299399,0.963359,2.015154,3.0,0.0,0.443548,0.184111
5574,1500.0,0.613235,33.4,709.0,0.0,1500.0,-2.566437,-0.255332,2.0,0.9452,0.019657,1.204935,0.868059,-0.165189,0.963359,0.350505,5.0,1.0,1.045108,1.817012
5644,20000.0,-0.514938,27.83,699.0,0.0,20000.0,-0.032711,0.743836,3.0,-0.053431,-2.541752,0.694311,-1.028981,-0.813399,-1.136794,0.200053,2.0,1.0,1.24478,0.254637
790,4500.0,-1.028548,22.82,709.0,0.0,4500.0,0.663164,-0.780621,1.0,0.781393,-1.698542,-0.81091,1.663611,1.36488,0.013599,1.115959,5.0,0.0,1.24478,-0.595669


In [49]:
# # Visualizar
# scaler.plot_histograms(
#     df_train,
#     X_train_scaled,
#     features=num_cols)

In [39]:
# encoder = WOEGuard(
#     categorical_cols=categorical_cols,
#     drop_original=True,       # mantém ou remove colunas originais
#     suffix='_woe',            # sufixo para as novas colunas
#     alpha=0.5,                # suavização Laplace
#     default_woe=0.0,          # WoE default para categorias novas
#     include_nan=True          # trata NaN como categoria separada
# )

In [40]:
# # 2. Ajustar e transformar
# df_transformed = encoder.fit_transform(
#     X=df[categorical_cols], # a lista de colunas não precisa ser igual à categorical_cols, mas somente as que forem instanciadas no WOEGuard que serão codificadas.
#     y=df[TARGET]
# )
# display(df_transformed)

In [41]:
sample_weights_train = compute_sample_weight(class_weight='balanced', y=y_train)

# # Modelo 1: Regressão Logística
# model1 = LogisticRegression(
#     max_iter=2000,
#     solver='lbfgs',
#     class_weight=None,      # Removido porque vamos usar sample_weight manualmente
#     C=1.0
# )
# model1.fit(X_train, y_train, sample_weight=sample_weights_train)


# Modelo 2: XGBoost


model2 = XGBClassifier(
    n_estimators=150,
    #class_weights='balanced',
    learning_rate=0.05,
    max_depth=5,
    subsample=0.85,
    colsample_bytree=0.80,
    #scale_pos_weight=1.0,
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1,
    #enable_categorical=True,
    #tree_method='hist'  # necessário com enable_categorical=True
)
model2.fit(
    X_train_scaled,
    y_train,
    sample_weight=sample_weights_train
    );

In [42]:
# gambiarra rapida
#df_train = pd.concat([X_train_scaled, y_train], axis=1)
#df_test = pd.concat([X_test_scaled, y_train], axis=1)

In [50]:
df_train.shape

(7000, 23)

In [43]:
evaluator = BinaryPerformanceEvaluator(
    model=model2,
    df_train=df_train,
    df_test=df_test,
    target_col='target',
    id_cols=['id'],
    date_col='date',
    homogeneous_group='auto'
    #group_col='group'
)

In [44]:
metrics = evaluator.compute_metrics()
metrics

Unnamed: 0_level_0,MCC,AUC_ROC,AUC_PR,Precision,Recall,Brier
Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Train,-0.697713,0.014128,0.094472,0.090909,0.464516,0.879471
Test,-0.665637,0.019514,0.094615,0.098576,0.508475,0.874006


In [45]:
evaluator.plot_confusion(evaluator.df_test['target'], evaluator.df_test['y_pred_proba'], threshold='ks')

In [46]:
evaluator.plot_calibration()

In [47]:
evaluator.plot_event_rate()

(Figure({
     'data': [{'line': {'color': 'rgb(217,58,70)'},
               'mode': 'lines+markers',
               'name': '(-inf, 977.70)',
               'type': 'scatter',
               'x': array(['2022-01-01T00:01:00.000000000'], dtype='datetime64[ns]'),
               'y': {'bdata': 'A9ycoSL17z8=', 'dtype': 'f8'}},
              {'line': {'color': 'rgb(234,180,184)'},
               'mode': 'lines+markers',
               'name': '[977.70, 994.32)',
               'type': 'scatter',
               'x': array(['2022-01-01T00:01:00.000000000'], dtype='datetime64[ns]'),
               'y': {'bdata': 'Hug+rSUQ0D8=', 'dtype': 'f8'}},
              {'line': {'color': 'rgb(182,202,217)'},
               'mode': 'lines+markers',
               'name': '[994.32, 996.02)',
               'type': 'scatter',
               'x': array(['2022-01-01T00:01:00.000000000'], dtype='datetime64[ns]'),
               'y': {'bdata': 'nrzD8aOboD8=', 'dtype': 'f8'}},
              {'line': {'color': '

In [48]:
evaluator.plot_psi(reference_last_period=True)


PSI could not be computed (insufficient data).



(Figure({
     'data': [], 'layout': {'template': '...'}
 }),
 Empty DataFrame
 Columns: []
 Index: [])

In [None]:
evaluator.plot_ks()

In [None]:
evaluator.plot_group_radar()

In [None]:
evaluator.plot_decile_ks()

### Teste de Stress

In [None]:
gen = SyntheticVintageGenerator(id_cols=['id'], date_cols=['date']).fit(train)
evaluator.synthetic_gen = gen
stress = evaluator.run_stress_test()
stress['metrics']