# Binary Performance Evaluator – Demo Completo

Neste exemplo vamos treinar um classificador simples e explorar várias funcionalidades do `BinaryPerformanceEvaluator`. Usaremos dados sintéticos.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from riskpilot.evaluation import BinaryPerformanceEvaluator
from riskpilot.synthetic import SyntheticVintageGenerator

from sklearn.utils.class_weight import compute_sample_weight

In [2]:
# --------------------------------------------------
# 1. CONFIGURAÇÕES
# --------------------------------------------------
FILE_PATH = "../../datasets/lending_club/accepted_2007_to_2018Q4.csv"
NROWS     = 1_000
TARGET_RAW = "loan_status"          # coluna original
TARGET     = "target"               # nome final binário

# --------------------------------------------------
# 2. LEITURA E CONVERSÃO DE TIPOS MISTOS
# --------------------------------------------------
def read_and_clean_csv_mixed_types(path, nrows=None, verbose=True):
    df = pd.read_csv(path, low_memory=False, nrows=nrows)

    # identificar colunas com tipos mistos
    for col in df.columns:
        types = df[col].dropna().map(type).value_counts()
        if len(types) > 1 and verbose:
            print(f"[!] '{col}' com múltiplos tipos: {dict(types)}")

        # tentativa de conversão automática p/ numérico
        if len(types) > 1:
            try:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            except Exception:
                df[col] = df[col].astype(str)

    return df

df = read_and_clean_csv_mixed_types(FILE_PATH, nrows=NROWS)

# drop de colunas não usadas (caso existam)
df.drop(columns=[c for c in ["member_id"] if c in df.columns], inplace=True)

# --------------------------------------------------
# 3. TARGET BINÁRIO
# --------------------------------------------------
bad_status = ["Charged Off", "Default", "Late (31-120 days)"]
df[TARGET] = df[TARGET_RAW].isin(bad_status).astype(int)

# --------------------------------------------------
# 4. DATA E ID
# --------------------------------------------------
#df["date"] = pd.to_datetime(df["issue_d"], format="%b-%Y", errors="coerce")

df["date"] = pd.to_datetime('202201', format="%Y%M", errors="coerce")

df.drop(columns=["issue_d", TARGET_RAW], inplace=True)

df.reset_index(drop=False, inplace=True)   # index ➜ nova coluna

# --------------------------------------------------
# 5. FEATURES NUMÉRICAS + IMPUTAÇÃO
# --------------------------------------------------
numeric_cols = df.select_dtypes(include=[np.number]).columns.drop(["id", TARGET])
na_threshold = 0.30
valid_cols = numeric_cols[df[numeric_cols].isna().mean() < na_threshold].tolist()
valid_cols.remove('index')

df[valid_cols] = df[valid_cols].fillna(df[valid_cols].median())

# --------------------------------------------------
# 6. DATAFRAME FINAL
# --------------------------------------------------

FEATURES = [
    "funded_amnt_inv",
    "annual_inc",
    "dti",
    "fico_range_high",
    "out_prncp_inv",
    "total_rec_prncp",
    "last_pymnt_amnt",
    "last_fico_range_low",
    "acc_open_past_24mths",
    "avg_cur_bal",
    "bc_open_to_buy",
    "mo_sin_old_rev_tl_op",
    "mo_sin_rcnt_rev_tl_op",
    "mo_sin_rcnt_tl",
    "mort_acc",
    "mths_since_recent_bc",
    "mths_since_recent_inq",
    "num_tl_op_past_12m",
    "percent_bc_gt_75",
    "total_bc_limit",
    #"sub_grade"
]

ID_COLS = ['id']

IGNORE_COLS = ['member_id']

DATE_COLS = ['date']

# --------------------------------------------------
# Preparar dataset base
# --------------------------------------------------
df_model = df[ID_COLS+DATE_COLS+[TARGET] + FEATURES].dropna().copy()

In [3]:
ID_COLS

['id']

In [4]:
import numpy as np
from riskpilot.synthetic import SyntheticVintageGenerator

gen = SyntheticVintageGenerator(
    id_cols=ID_COLS,
    date_cols=DATE_COLS
).fit(df_model)

In [6]:
synthetic = gen.generate(
    n_periods=36,
    freq='ME',
    scenario='stress')
synthetic.head()

Unnamed: 0,id,date,target,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,...,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
0,369f29d5ec804d15813f26e4eed83fd8,2025-06-30,0,30000.0,132814.366241,17.949771,664.0,0.0,31980.889321,748.814641,...,8216.884826,174.0,4.0,5.0,0,25.0,5.0,2,33.3,18104.970951
1,7b6c8bb560ec48a3924476ac22f87635,2025-06-30,1,15000.0,94761.277294,17.748759,699.0,0.0,9848.376908,4422.016546,...,7957.926767,152.0,2.0,5.0,0,1.0,0.0,2,0.0,13945.425458
2,70133566f291461d8a1e27de12e152aa,2025-06-30,0,28000.0,123000.0,14.444879,699.0,0.0,28000.0,675.157374,...,23055.059392,107.0,25.0,18.0,0,5.0,2.0,1,0.0,24065.135589
3,b2288eafdac14c73b098da28f04a21b5,2025-06-30,0,14000.0,35982.984766,15.189473,729.0,0.0,12000.0,49.612043,...,3511.356711,142.0,1.0,2.0,1,2.0,5.0,2,57.1,7600.0
4,6665239b26ff499397355b1a9636ac7d,2025-06-30,0,7500.0,45000.0,25.819285,689.0,0.0,6021.230848,72.635623,...,15152.010332,169.0,1.0,3.0,1,1.0,5.0,0,28.6,18992.84467


In [None]:
# --------------------------------------------------
# 2. Split treino / teste
# --------------------------------------------------
train_idx, test_idx = train_test_split(
    df_model.index,
    test_size=0.30,
    stratify=df_model[TARGET],
    random_state=42,
)

# Criar df_train e df_test primeiro
df_train = df_model.loc[train_idx].copy()
df_test  = df_model.loc[test_idx].copy()

# Depois extrair X e y a partir dos DataFrames
X_train, y_train = df_train[FEATURES], df_train[TARGET]
X_test , y_test  = df_test[FEATURES] , df_test[TARGET]

In [57]:
# --------------------------------------------------
# Garantir cópias independentes dos conjuntos de treino e teste
# --------------------------------------------------
X_train = df_train[FEATURES].copy()
X_test = df_test[FEATURES].copy()

# --------------------------------------------------
# Converter colunas categóricas (tipo object) para 'category' em X e df
# --------------------------------------------------
def convert_object_to_category(df):
    obj_cols = df.select_dtypes(include='object').columns
    df[obj_cols] = df[obj_cols].astype('category')
    return df

# Aplicar nos conjuntos usados no modelo
X_train = convert_object_to_category(X_train)
X_test = convert_object_to_category(X_test)

# Garantir que df_train e df_test também estejam com tipos consistentes
df_train = convert_object_to_category(df_train)
df_test = convert_object_to_category(df_test)

# --------------------------------------------------
# Verificações finais
# --------------------------------------------------
print("Colunas 'object' restantes em X_train:", X_train.dtypes[X_train.dtypes == 'object'])
print("Colunas 'object' restantes em df_test:", df_test.dtypes[df_test.dtypes == 'object'])


Colunas 'object' restantes em X_train: Series([], dtype: object)
Colunas 'object' restantes em df_test: Series([], dtype: object)


In [58]:
num_cols = df_train.select_dtypes(include=[float, int]).columns.tolist()

for col in ['index', 'id', TARGET]:
    try:
        num_cols.remove(col)
    except:
        pass   

In [59]:
# Aplicar o DynamicScaler no treino
from riskpilot.utils.scaler import DynamicScaler
scaler = DynamicScaler(
    strategy="auto",
    shapiro_p_val = 0.01, # se aumentar fica mais restritiva a escolha de StandardScaler()
    #serialize=True,
    #save_path="scalers.pkl", # arquivo com informações de escalonamento salvas 
    random_state=42,
    ignore_cols=ID_COLS+IGNORE_COLS+DATE_COLS+[TARGET]
)

scaler.fit(df_train)

# Relatório das decisões
print("\nResumo das decisões por feature:")
display(scaler.report_as_df())

INFO: Ignoring columns (no scaling): ['id', 'target']

Precision loss occurred in moment calculation due to catastrophic cancellation. This occurs when the data are nearly identical. Results may be unreliable.


Precision loss occurred in moment calculation due to catastrophic cancellation. This occurs when the data are nearly identical. Results may be unreliable.


n_quantiles (1000) is greater than the total number of samples (700). n_quantiles is set to n_samples.


n_quantiles (1000) is greater than the total number of samples (700). n_quantiles is set to n_samples.


n_quantiles (1000) is greater than the total number of samples (700). n_quantiles is set to n_samples.

INFO: Coluna 'funded_amnt_inv' → RobustScaler (p=0.000, skew=0.51, kurt=-0.4) | motivo: skew moderado/outliers
INFO: Coluna 'funded_amnt_inv' → RobustScaler (p=0.000, skew=0.51, kurt=-0.4) | motivo: skew moderado/outliers
INFO: Coluna 'annual_inc' → RobustScaler (p=0.000, skew=2.11, kurt=11.0) | motivo: skew moderad


Resumo das decisões por feature:


Unnamed: 0,p_value,skew,kurtosis,reason,scaler,post_skew,post_kurtosis
funded_amnt_inv,1.664211e-12,0.5085,-0.3959,skew moderado/outliers,RobustScaler,,
annual_inc,1.0796590000000001e-23,2.106841,10.974685,skew moderado/outliers,RobustScaler,,
dti,3.934199e-05,0.271101,-0.415908,default,MinMaxScaler,,
fico_range_high,2.394015e-20,1.134391,1.118171,box-cox (high skew),PowerTransformer,,
out_prncp_inv,5.757863e-43,2.988878,8.468441,yeo-johnson (high skew),PowerTransformer,2.22093,2.935013
total_rec_prncp,2.924658e-16,0.779351,0.007893,skew moderado/outliers,RobustScaler,,
last_pymnt_amnt,2.6505960000000003e-32,1.798974,2.98632,box-cox (high skew),PowerTransformer,-0.027852,-0.542435
last_fico_range_low,5.991563e-35,-3.457413,15.468496,assimetria/kurtosis extrema,QuantileTransformer,,
acc_open_past_24mths,4.447859e-18,1.141839,2.281581,yeo-johnson (high skew),PowerTransformer,-0.010969,-0.10287
avg_cur_bal,8.915346e-30,2.522677,11.61179,skew moderado/outliers,RobustScaler,,


In [60]:
display(X_train.head())

# 5. Transformar o conjunto de treino
df_train_scaled = scaler.transform(df_train, return_df=True)
X_train_scaled = scaler.transform(X_train, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_train_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
967,12000.0,50000.0,7.54,774.0,0.0,12000.0,361.28,805.0,2.0,1288.0,50612.0,266.0,24.0,24.0,0.0,24.0,5.0,0.0,0.0,61700.0
993,14000.0,132700.0,25.38,664.0,0.0,14000.0,93.38,705.0,10.0,9813.0,94.0,124.0,5.0,1.0,5.0,19.0,1.0,3.0,100.0,1300.0
78,20200.0,60000.0,34.84,724.0,0.0,4258.61,518.35,560.0,6.0,4322.0,9516.0,74.0,7.0,7.0,0.0,16.0,9.0,4.0,25.0,17800.0
305,12700.0,32760.0,34.32,679.0,0.0,12700.0,458.68,640.0,19.0,1300.0,12822.0,129.0,1.0,1.0,0.0,2.0,1.0,11.0,33.3,22000.0
577,9600.0,110000.0,13.83,669.0,0.0,9600.0,5548.36,720.0,4.0,4003.0,2456.0,335.0,4.0,4.0,1.0,4.0,5.0,2.0,33.3,10000.0


INFO: Untouched columns preserved: ['id', 'date', 'target']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
967,-0.272109,-0.44,0.149957,1.387779e-17,-0.384032,0.079348,-0.739481,2.053158,-0.924677,-0.522282,1.784195,0.84709,1.125512,1.810554,-1.217353,0.571429,0.03821,-1.709949,0.0,1.46476
993,-0.090703,1.214,0.537109,1.387779e-17,-0.384032,0.244882,-1.309063,0.109593,1.39294,-0.03832,-1.733675,-0.677463,-0.277865,-1.317889,1.347085,0.333333,-1.14672,0.550157,1.0,-1.852264
78,0.471655,-0.24,0.742405,1.387779e-17,-0.384032,-0.561382,-0.571379,-1.256679,0.47366,-0.350043,0.281594,-1.502001,0.008965,0.451281,-1.217353,0.190476,0.710408,0.953208,0.25,-0.06411
305,-0.208617,-0.7848,0.73112,1.387779e-17,-0.384032,0.137285,-0.629138,-0.693749,2.836107,-0.521601,0.531408,-0.608021,-1.371423,-1.317889,-1.217353,-0.47619,-1.14672,2.651423,0.333,0.155909
577,-0.489796,0.76,0.286458,1.387779e-17,-0.384032,-0.119292,0.731299,0.416996,-0.128114,-0.368152,-0.535543,1.393063,-0.476438,-0.133928,-0.154114,-0.380952,0.03821,0.047358,0.333,-0.593499


In [61]:
display(X_test.head())

# 5. Transformar o conjunto de treino
df_test_scaled = scaler.transform(df_test, return_df=True)
X_test_scaled = scaler.transform(X_test, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_test_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
14,28000.0,92000.0,21.6,724.0,0.0,28000.0,17093.51,760.0,1.0,13819.0,16623.0,379.0,19.0,19.0,2.0,19.0,5.0,0.0,22.2,41500.0
272,7200.0,60000.0,20.68,729.0,0.0,7200.0,3196.17,725.0,4.0,13026.0,2273.0,159.0,16.0,8.0,1.0,55.0,8.0,1.0,0.0,7000.0
928,28000.0,82000.0,12.67,704.0,0.0,28000.0,857.84,800.0,6.0,25361.0,6508.0,187.0,9.0,9.0,5.0,9.0,8.0,2.0,66.7,39050.0
723,10000.0,100000.0,16.18,679.0,0.0,10000.0,9437.82,555.0,9.0,3273.0,5982.0,349.0,14.0,2.0,0.0,14.0,14.0,3.0,80.0,15100.0
244,13875.0,32500.0,23.53,714.0,0.0,13875.0,11101.71,705.0,3.0,17421.0,2076.0,177.0,1.0,1.0,1.0,15.0,15.0,1.0,75.0,12500.0


INFO: Untouched columns preserved: ['id', 'date', 'target']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
14,1.179138,0.4,0.455078,1.387779e-17,-0.384032,1.403617,1.490442,1.035513,-1.469213,0.1891,0.720768,1.70365,0.925333,1.550825,0.424968,0.333333,0.03821,-1.709949,0.222,0.91044
272,-0.707483,-0.24,0.435113,1.387779e-17,-0.384032,-0.317933,0.394668,0.508828,-0.128114,0.144082,-0.568958,-0.224443,0.700608,0.595649,-0.154114,2.047619,0.564644,-0.630364,0.0,-0.875845
928,1.179138,0.2,0.261285,1.387779e-17,-0.384032,1.403617,-0.324404,1.912641,0.47366,0.844337,0.093467,0.092898,0.218685,0.724024,1.347085,-0.142857,0.564644,0.047358,0.667,0.831365
723,-0.453515,0.56,0.337457,1.387779e-17,-0.384032,-0.086186,1.077071,-1.314015,1.187695,-0.409594,0.034369,1.494633,0.596581,-0.786831,-1.217353,0.095238,1.308619,0.550157,0.8,-0.225093
244,-0.102041,-0.79,0.496962,1.387779e-17,-0.384032,0.234536,1.187191,0.109593,-0.492397,0.393585,-0.610349,-0.016632,-1.371423,-1.317889,-0.154114,0.142857,1.409561,-0.630364,0.75,-0.399981


In [39]:
# # Visualizar
# scaler.plot_histograms(
#     df_train,
#     X_train_scaled,
#     features=num_cols)

In [9]:
# encoder = WOEGuard(
#     categorical_cols=categorical_cols,
#     drop_original=True,       # mantém ou remove colunas originais
#     suffix='_woe',            # sufixo para as novas colunas
#     alpha=0.5,                # suavização Laplace
#     default_woe=0.0,          # WoE default para categorias novas
#     include_nan=True          # trata NaN como categoria separada
# )

In [10]:
# # 2. Ajustar e transformar
# df_transformed = encoder.fit_transform(
#     X=df[categorical_cols], # a lista de colunas não precisa ser igual à categorical_cols, mas somente as que forem instanciadas no WOEGuard que serão codificadas.
#     y=df[TARGET]
# )
# display(df_transformed)

In [64]:
sample_weights_train = compute_sample_weight(class_weight='balanced', y=y_train)

# # Modelo 1: Regressão Logística
# model1 = LogisticRegression(
#     max_iter=2000,
#     solver='lbfgs',
#     class_weight=None,      # Removido porque vamos usar sample_weight manualmente
#     C=1.0
# )
# model1.fit(X_train, y_train, sample_weight=sample_weights_train)


# Modelo 2: XGBoost


model2 = XGBClassifier(
    n_estimators=150,
    #class_weights='balanced',
    learning_rate=0.05,
    max_depth=5,
    subsample=0.85,
    colsample_bytree=0.80,
    #scale_pos_weight=1.0,
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1,
    #enable_categorical=True,
    #tree_method='hist'  # necessário com enable_categorical=True
)
model2.fit(
    X_train_scaled,
    y_train,
    sample_weight=sample_weights_train
    );

In [None]:
# gambiarra rapida
#df_train = pd.concat([X_train_scaled, y_train], axis=1)
#df_test = pd.concat([X_test_scaled, y_train], axis=1)

In [65]:
evaluator = BinaryPerformanceEvaluator(
    model=model2,
    df_train=df_train,
    df_test=df_test,
    target_col='target',
    id_cols=['id'],
    date_col='date',
    homogeneous_group='auto'
    #group_col='group'
)

In [66]:
metrics = evaluator.compute_metrics()
metrics

Unnamed: 0_level_0,MCC,AUC_ROC,AUC_PR,Precision,Recall,Brier
Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Train,0.0,0.596519,0.193577,0.15,1.0,0.841397
Test,0.0,0.60183,0.206996,0.15,1.0,0.841403


In [67]:
evaluator.plot_confusion(evaluator.df_test['target'], evaluator.df_test['y_pred_proba'], threshold='ks')

In [68]:
evaluator.plot_calibration()

In [69]:
evaluator.plot_event_rate()

(Figure({
     'data': [{'line': {'color': 'rgb(65,124,167)'},
               'mode': 'lines+markers',
               'name': '(-inf, inf)',
               'type': 'scatter',
               'x': array(['2015-12-01T00:00:00.000000000'], dtype='datetime64[ns]'),
               'y': {'bdata': 'MzMzMzMzwz8=', 'dtype': 'f8'}}],
     'layout': {'template': '...',
                'title': {'text': 'Event Rate by Group over Time'},
                'xaxis': {'title': {'text': 'date'}},
                'yaxis': {'title': {'text': 'Event rate'}}}
 }),
 Figure({
     'data': [{'marker': {'color': 'rgb(65,124,167)'},
               'name': '(-inf, inf)',
               'type': 'bar',
               'x': array(['2015-12-01T00:00:00.000000000'], dtype='datetime64[ns]'),
               'y': {'bdata': 'AAAAAAAA8D8=', 'dtype': 'f8'}}],
     'layout': {'barmode': 'stack',
                'template': '...',
                'title': {'text': 'Group Share over Time'},
                'xaxis': {'title': {'te

In [70]:
evaluator.plot_psi()

In [None]:
evaluator.plot_ks()

In [None]:
evaluator.plot_group_radar()

In [None]:
evaluator.plot_decile_ks()

### Teste de Stress

In [None]:
gen = SyntheticVintageGenerator(id_cols=['id'], date_cols=['date']).fit(train)
evaluator.synthetic_gen = gen
stress = evaluator.run_stress_test()
stress['metrics']