# Binary Performance Evaluator – Demo Completo

Neste exemplo vamos treinar um classificador simples e explorar várias funcionalidades do `BinaryPerformanceEvaluator`. Usaremos dados sintéticos.

In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from riskpilot.evaluation import BinaryPerformanceEvaluator
from riskpilot.synthetic import LookAhead

from sklearn.utils.class_weight import compute_sample_weight

In [41]:
# --------------------------------------------------
# 1. CONFIGURAÇÕES
# --------------------------------------------------
FILE_PATH = "../../datasets/lending_club/accepted_2007_to_2018Q4.csv"
NROWS     = 10_000
TARGET_RAW = "loan_status"          # coluna original
TARGET     = "target"               # nome final binário

# --------------------------------------------------
# 2. LEITURA E CONVERSÃO DE TIPOS MISTOS
# --------------------------------------------------
def read_and_clean_csv_mixed_types(path, nrows=None, verbose=True):
    df = pd.read_csv(path, low_memory=False, nrows=nrows)

    # identificar colunas com tipos mistos
    for col in df.columns:
        types = df[col].dropna().map(type).value_counts()
        if len(types) > 1 and verbose:
            print(f"[!] '{col}' com múltiplos tipos: {dict(types)}")

        # tentativa de conversão automática p/ numérico
        if len(types) > 1:
            try:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            except Exception:
                df[col] = df[col].astype(str)

    return df

df = read_and_clean_csv_mixed_types(FILE_PATH, nrows=NROWS)

# drop de colunas não usadas (caso existam)
df.drop(columns=[c for c in ["member_id"] if c in df.columns], inplace=True)

# --------------------------------------------------
# 3. TARGET BINÁRIO
# --------------------------------------------------
bad_status = ["Charged Off", "Default", "Late (31-120 days)"]
df[TARGET] = df[TARGET_RAW].isin(bad_status).astype(int)

# --------------------------------------------------
# 4. DATA E ID
# --------------------------------------------------
#df["date"] = pd.to_datetime(df["issue_d"], format="%b-%Y", errors="coerce")

df["date"] = pd.to_datetime('202201', format="%Y%M", errors="coerce")

df.drop(columns=["issue_d", TARGET_RAW], inplace=True)

df.reset_index(drop=False, inplace=True)   # index ➜ nova coluna

# --------------------------------------------------
# 5. FEATURES NUMÉRICAS + IMPUTAÇÃO
# --------------------------------------------------
numeric_cols = df.select_dtypes(include=[np.number]).columns.drop(["id", TARGET])
na_threshold = 0.30
valid_cols = numeric_cols[df[numeric_cols].isna().mean() < na_threshold].tolist()
valid_cols.remove('index')

df[valid_cols] = df[valid_cols].fillna(df[valid_cols].median())

# --------------------------------------------------
# 6. DATAFRAME FINAL
# --------------------------------------------------

FEATURES = [
    "funded_amnt_inv",
    "annual_inc",
    "dti",
    "fico_range_high",
    "out_prncp_inv",
    "total_rec_prncp",
    "last_pymnt_amnt",
    "last_fico_range_low",
    "acc_open_past_24mths",
    "avg_cur_bal",
    "bc_open_to_buy",
    "mo_sin_old_rev_tl_op",
    "mo_sin_rcnt_rev_tl_op",
    "mo_sin_rcnt_tl",
    "mort_acc",
    "mths_since_recent_bc",
    "mths_since_recent_inq",
    "num_tl_op_past_12m",
    "percent_bc_gt_75",
    "total_bc_limit",
    #"sub_grade"
]

ID_COLS = ['id']

IGNORE_COLS = ['member_id']

DATE_COLS = ['date']

# --------------------------------------------------
# Preparar dataset base
# --------------------------------------------------
df_model = df[ID_COLS+DATE_COLS+[TARGET] + FEATURES].dropna().copy()

In [42]:
ID_COLS

['id']

In [None]:
# df_model['date'] = pd.to_datetime(df_model['date'], format='%yyyy%mm')
# Cria a coluna yyyymm como inteiro
#df_model['yyyymm_int'] = df_model['date'].dt.year * 100 + df_model['date'].dt.month


df_model['yyyymm_datetime'] = df_model['date'].dt.to_period('M').dt.to_timestamp()
DATE_COLS = ['yyyymm_datetime']
DATE_COLS

['yyyymm_datetime']

In [44]:
import numpy as np
from riskpilot.synthetic import LookAhead

gen = LookAhead(
    id_cols=ID_COLS,
    date_cols=DATE_COLS
).fit(df_model)

In [45]:
print(df_model['yyyymm_datetime'].min())
print(df_model['yyyymm_datetime'].max())

2022-01-01 00:00:00
2022-01-01 00:00:00


In [46]:
synthetic = gen.generate(
    n_periods=36,
    freq='ME',
    scenario='stress')
print(synthetic['yyyymm_datetime'].min())
print(synthetic['yyyymm_datetime'].max())

display(synthetic.head())

2022-01-01 00:00:00
2024-12-01 00:00:00


Unnamed: 0,id,yyyymm_datetime,date,target,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,...,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit,yyyymm_int
0,f126cf0c708e43e8a48f4a9685a594bd,2022-01-01,2022-01-01 00:01:00,0,20000.0,85000.0,25.427224,734.0,0.0,8873.222445,...,146.0,4.0,6.0,1,3.0,8.0,3.0,60.0,43416.25273,1
1,9e7a6effda354d7cb5a2d0ac62db83d4,2022-01-01,2022-01-01 00:01:00,0,19200.0,38000.0,18.75778,709.0,6991.533308,15386.617673,...,154.737669,6.0,5.0,0,7.0,1.0,2.0,0.0,12868.865004,1
2,a3aadc6f1f654fd6af8abd0e945b9862,2022-01-01,2022-01-01 00:01:00,0,28000.0,57000.0,10.840848,729.0,0.0,24000.0,...,31.553253,10.0,3.0,1,22.0,2.0,2.0,58.460914,8197.341187,1
3,3723e8cf6458470292bf84492f70341e,2022-01-01,2022-01-01 00:01:00,1,24000.0,80000.0,34.92076,719.0,0.0,7200.0,...,108.0,9.0,1.0,1,7.0,0.0,5.0,80.0,17800.0,1
4,da3eff1030fa42ee9db0be2fe131423d,2022-01-01,2022-01-01 00:01:00,0,6000.0,70000.0,8.363058,699.0,0.0,7677.174808,...,7.890073,7.0,2.0,0,20.0,3.0,5.0,0.0,28.131561,1


In [33]:
custom_noise = {
    'utilifico_range_highzation': {'func': np.random.exponential, 'kwargs': {'scale':50}},
}
custom_gen = LookAhead(
    id_cols=['id'],
    date_cols=['date'],
    custom_noise=custom_noise).fit(df_model)

custom = custom_gen.generate(
    n_periods=2,
    freq='ME',
    scenario='stress'
    )

custom.head()
print(custom['date'].min())
print(custom['date'].max())

display(custom.head())




2025-01-31 00:01:00
2025-02-28 00:01:00


Unnamed: 0,id,date,target,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,...,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
0,58d57219719a4e46a4aaec71d95488c2,2025-01-31 00:01:00,0,3305.474703,42849.51959,34.999199,664.0,0.0,5000.0,354.909263,...,0.0,156.0,15.982875,2.0,0,62.197912,0.0,2.0,100.0,1496.200098
1,2731efc54db94070a1a149822735f142,2025-01-31 00:01:00,0,15000.0,30011.484177,34.735067,679.0,0.0,15000.0,151.888972,...,503.008925,194.082392,20.0,6.0,0,25.0,2.0,1.0,100.0,1000.0
2,8a6868d7576749bd86bcae2524daed34,2025-01-31 00:01:00,1,6000.0,36000.0,12.067352,664.0,0.0,1622.26629,153.767846,...,234.187713,123.0,0.0,1.0,0,13.0,12.0,2.0,100.0,7700.0
3,28d9a69ba81d4483b7f5bc2c49ef4427,2025-01-31 00:01:00,0,15927.811215,120000.0,14.563062,784.0,0.0,12000.0,6967.254844,...,14758.713874,230.856003,2.0,3.0,4,20.0,3.0,2.0,33.3,19365.95836
4,26fbaac3e6284e2599ab16b9e05f099b,2025-01-31 00:01:00,0,16802.990713,67299.200279,12.360282,669.0,7290.280399,9000.0,526.815823,...,743.632365,104.0,5.0,5.0,0,5.0,20.397743,1.0,33.3,10092.397199


In [34]:
df_model = pd.concat([df_model, synthetic], axis=0)

In [35]:
# --------------------------------------------------
# 2. Split treino / teste
# --------------------------------------------------
train_idx, test_idx = train_test_split(
    df_model.index,
    test_size=0.30,
    stratify=df_model[TARGET],
    random_state=42,
)

# Criar df_train e df_test primeiro
df_train = df_model.loc[train_idx].copy()
df_test  = df_model.loc[test_idx].copy()

# Depois extrair X e y a partir dos DataFrames
X_train, y_train = df_train[FEATURES], df_train[TARGET]
X_test , y_test  = df_test[FEATURES] , df_test[TARGET]

In [36]:
# --------------------------------------------------
# Garantir cópias independentes dos conjuntos de treino e teste
# --------------------------------------------------
X_train = df_train[FEATURES].copy()
X_test = df_test[FEATURES].copy()

# --------------------------------------------------
# Converter colunas categóricas (tipo object) para 'category' em X e df
# --------------------------------------------------
def convert_object_to_category(df):
    obj_cols = df.select_dtypes(include='object').columns
    df[obj_cols] = df[obj_cols].astype('category')
    return df

# Aplicar nos conjuntos usados no modelo
X_train = convert_object_to_category(X_train)
X_test = convert_object_to_category(X_test)

# Garantir que df_train e df_test também estejam com tipos consistentes
df_train = convert_object_to_category(df_train)
df_test = convert_object_to_category(df_test)

# --------------------------------------------------
# Verificações finais
# --------------------------------------------------
print("Colunas 'object' restantes em X_train:", X_train.dtypes[X_train.dtypes == 'object'])
print("Colunas 'object' restantes em df_test:", df_test.dtypes[df_test.dtypes == 'object'])


Colunas 'object' restantes em X_train: Series([], dtype: object)
Colunas 'object' restantes em df_test: Series([], dtype: object)


In [37]:
num_cols = df_train.select_dtypes(include=[float, int]).columns.tolist()

for col in ['index', 'id', TARGET]:
    try:
        num_cols.remove(col)
    except:
        pass   

In [38]:
# Aplicar o DynamicScaler no treino
from riskpilot.utils.scaler import DynamicScaler
scaler = DynamicScaler(
    strategy="auto",
    shapiro_p_val = 0.01, # se aumentar fica mais restritiva a escolha de StandardScaler()
    #serialize=True,
    #save_path="scalers.pkl", # arquivo com informações de escalonamento salvas 
    random_state=42,
    ignore_cols=ID_COLS+IGNORE_COLS+DATE_COLS+[TARGET]
)

scaler.fit(df_train)

# Relatório das decisões
print("\nResumo das decisões por feature:")
display(scaler.report_as_df())

INFO: Ignoring columns (no scaling): ['target']



Resumo das decisões por feature:


Unnamed: 0,chosen_scaler,validation_stats,ignored,candidates_tried,reason
funded_amnt_inv,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
annual_inc,PowerTransformer,"{'post_std': 0.9999999999999999, 'post_iqr': 1...",[],[PowerTransformer],stats|skew|kurt
dti,QuantileTransformer,"{'post_std': 0.9976154944313745, 'post_iqr': 1...",[],"[PowerTransformer, QuantileTransformer]",stats|skew|kurt
fico_range_high,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
out_prncp_inv,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
total_rec_prncp,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
last_pymnt_amnt,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.57396239380647...",[],[PowerTransformer],stats|skew|kurt
last_fico_range_low,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.14477131370102...",[],[PowerTransformer],stats|skew|kurt
acc_open_past_24mths,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.22882412681832...",[],[PowerTransformer],stats|skew|kurt
avg_cur_bal,PowerTransformer,"{'post_std': 1.0000000000000002, 'post_iqr': 1...",[],[PowerTransformer],stats|skew|kurt


In [39]:
scaler.report_as_df()['candidates_tried']

funded_amnt_inv          [PowerTransformer, QuantileTransformer, Robust...
annual_inc                                              [PowerTransformer]
dti                                [PowerTransformer, QuantileTransformer]
fico_range_high          [PowerTransformer, QuantileTransformer, Robust...
out_prncp_inv            [PowerTransformer, QuantileTransformer, Robust...
total_rec_prncp          [PowerTransformer, QuantileTransformer, Robust...
last_pymnt_amnt                                         [PowerTransformer]
last_fico_range_low                                     [PowerTransformer]
acc_open_past_24mths                                    [PowerTransformer]
avg_cur_bal                                             [PowerTransformer]
bc_open_to_buy                                          [PowerTransformer]
mo_sin_old_rev_tl_op     [PowerTransformer, QuantileTransformer, Robust...
mo_sin_rcnt_rev_tl_op                                   [PowerTransformer]
mo_sin_rcnt_tl           

In [40]:
display(X_train.head())

# 5. Transformar o conjunto de treino
df_train_scaled = scaler.transform(df_train, return_df=True)
X_train_scaled = scaler.transform(X_train, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_train_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
25381,11837.372011,84000.0,0.088439,764.0,0.0,10922.171341,472.328201,685.0,1.0,2499.497568,23942.80363,94.921757,14.0,5.0,0.0,76.969682,0.0,0.0,0.0,36084.616674
25381,11837.372011,84000.0,0.088439,764.0,0.0,10922.171341,472.328201,685.0,1.0,2499.497568,23942.80363,94.921757,14.0,5.0,0.0,76.969682,0.0,0.0,0.0,36084.616674
3048,12000.0,45000.0,15.52,744.0,0.0,12000.0,10242.7,805.0,1.0,10963.0,11391.0,221.0,38.0,19.0,1.0,38.0,19.0,0.0,20.0,24300.0
3048,18150.162539,41860.885255,32.967164,734.0,0.0,7964.719222,86.596853,700.0,2.0,5948.032575,3143.642675,65.0,24.0,5.0,0.0,31.0,7.0,1.0,87.514987,38500.0
3048,18150.162539,41860.885255,32.967164,734.0,0.0,7964.719222,86.596853,700.0,2.0,5948.032575,3143.642675,65.0,24.0,5.0,0.0,31.0,7.0,1.0,87.514987,38500.0


INFO: Untouched columns preserved: ['id', 'target', 'date']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
25381,11837.372011,0.421523,-3.91266,764.0,0.0,10922.171341,-0.552686,0.087909,-1.483916,-0.981994,1.137365,94.921757,0.590358,0.069049,-1.128465,1.584465,0.0,-1.663449,-5.199338,0.823418
25381,11837.372011,0.421523,-3.91266,764.0,0.0,10922.171341,-0.552686,0.087909,-1.483916,-0.981994,1.137365,94.921757,0.590358,0.069049,-1.128465,1.584465,0.0,-1.663449,-5.199338,0.823418
3048,12000.0,-0.765683,-0.391385,744.0,0.0,12000.0,1.189591,1.625057,-1.483916,0.290632,0.524175,221.0,1.512078,1.490485,0.017618,0.95614,19.0,-1.663449,-0.451469,0.357893
3048,18150.162539,-0.907351,1.409189,734.0,0.0,7964.719222,-1.313598,0.264816,-0.927458,-0.25564,-0.333914,65.0,1.086313,0.069049,-1.128465,0.775668,7.0,-0.613222,0.926611,0.904982
3048,18150.162539,-0.907351,1.409189,734.0,0.0,7964.719222,-1.313598,0.264816,-0.927458,-0.25564,-0.333914,65.0,1.086313,0.069049,-1.128465,0.775668,7.0,-0.613222,0.926611,0.904982


In [41]:
display(X_test.head())

# 5. Transformar o conjunto de treino
df_test_scaled = scaler.transform(df_test, return_df=True)
X_test_scaled = scaler.transform(X_test, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_test_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
9806,28000.0,92281.0,20.1,669.0,12018.05,15981.95,646.74,660.0,3.0,5899.0,7143.0,194.0,17.0,17.0,2.0,52.0,4.0,0.0,20.0,18900.0
9806,12000.0,62105.086247,7.313272,664.0,0.0,3982.034536,1321.479593,705.0,12.0,478.905505,2648.750388,115.0,0.0,0.0,0.0,1.0,1.0,10.485255,0.0,8200.0
9806,12000.0,62105.086247,7.313272,664.0,0.0,3982.034536,1321.479593,705.0,12.0,478.905505,2648.750388,115.0,0.0,0.0,0.0,1.0,1.0,10.485255,0.0,8200.0
12844,12000.0,99824.993747,33.559887,699.0,12514.013382,4515.701652,277.173786,685.0,17.758868,5089.888668,14215.483479,192.0,1.0,2.0,3.0,0.0,3.0,6.0,100.0,36301.903109
12844,12000.0,99824.993747,33.559887,699.0,12514.013382,4515.701652,277.173786,685.0,17.758868,5089.888668,14215.483479,192.0,1.0,2.0,3.0,0.0,3.0,6.0,100.0,36301.903109


INFO: Untouched columns preserved: ['id', 'target', 'date']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
9806,28000.0,0.594951,0.117505,669.0,12018.05,15981.95,-0.397279,-0.197348,-0.488912,-0.262837,0.185658,194.0,0.768221,1.370815,0.592254,1.235043,4.0,-1.663449,-0.451469,0.088859
9806,12000.0,-0.145008,-1.462489,664.0,0.0,3982.034536,-0.025967,0.324747,1.767807,-2.222782,-0.431366,115.0,-2.16594,-2.280702,-1.128465,-1.803403,1.0,2.573768,-5.199338,-0.675143
9806,12000.0,-0.145008,-1.462489,664.0,0.0,3982.034536,-0.025967,0.324747,1.767807,-2.222782,-0.431366,115.0,-2.16594,-2.280702,-1.128465,-1.803403,1.0,2.573768,-5.199338,-0.675143
12844,12000.0,0.738835,1.482388,699.0,12514.013382,4515.701652,-0.805687,0.087909,2.651595,-0.390249,0.696995,192.0,-1.439578,-0.805228,0.962317,-2.4629,3.0,1.584786,5.199338,0.83091
12844,12000.0,0.738835,1.482388,699.0,12514.013382,4515.701652,-0.805687,0.087909,2.651595,-0.390249,0.696995,192.0,-1.439578,-0.805228,0.962317,-2.4629,3.0,1.584786,5.199338,0.83091


In [42]:
# # Visualizar
# scaler.plot_histograms(
#     df_train,
#     X_train_scaled,
#     features=num_cols)

In [43]:
# encoder = WOEGuard(
#     categorical_cols=categorical_cols,
#     drop_original=True,       # mantém ou remove colunas originais
#     suffix='_woe',            # sufixo para as novas colunas
#     alpha=0.5,                # suavização Laplace
#     default_woe=0.0,          # WoE default para categorias novas
#     include_nan=True          # trata NaN como categoria separada
# )

In [44]:
# # 2. Ajustar e transformar
# df_transformed = encoder.fit_transform(
#     X=df[categorical_cols], # a lista de colunas não precisa ser igual à categorical_cols, mas somente as que forem instanciadas no WOEGuard que serão codificadas.
#     y=df[TARGET]
# )
# display(df_transformed)

In [45]:
sample_weights_train = compute_sample_weight(class_weight='balanced', y=y_train)

# # Modelo 1: Regressão Logística
# model1 = LogisticRegression(
#     max_iter=2000,
#     solver='lbfgs',
#     class_weight=None,      # Removido porque vamos usar sample_weight manualmente
#     C=1.0
# )
# model1.fit(X_train, y_train, sample_weight=sample_weights_train)


# Modelo 2: XGBoost


model2 = XGBClassifier(
    n_estimators=150,
    #class_weights='balanced',
    learning_rate=0.05,
    max_depth=5,
    subsample=0.85,
    colsample_bytree=0.80,
    #scale_pos_weight=1.0,
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1,
    #enable_categorical=True,
    #tree_method='hist'  # necessário com enable_categorical=True
)
model2.fit(
    X_train_scaled,
    y_train,
    sample_weight=sample_weights_train
    );

In [46]:
# gambiarra rapida
#df_train = pd.concat([X_train_scaled, y_train], axis=1)
#df_test = pd.concat([X_test_scaled, y_train], axis=1)

In [47]:
#df_train.shape

df_train['date'].min(),df_train['date'].max()

(Timestamp('2022-01-01 00:01:00'), Timestamp('2024-12-31 00:01:00'))

In [48]:
evaluator = BinaryPerformanceEvaluator(
    model=model2,
    df_train=df_train,
    df_test=df_test,
    target_col='target',
    id_cols=['id'],
    date_col='date',
    homogeneous_group='auto'
    #group_col='group'
)

In [49]:
metrics = evaluator.compute_metrics()
metrics

Unnamed: 0_level_0,MCC,AUC_ROC,AUC_PR,Precision,Recall,Brier
Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Train,-0.471458,0.105048,0.096827,0.128001,0.686166,0.718856
Test,-0.46282,0.111294,0.098042,0.129928,0.690772,0.716016


In [50]:
evaluator.plot_confusion(evaluator.df_test['target'], evaluator.df_test['y_pred_proba'], threshold='ks')

In [51]:
evaluator.plot_calibration()

In [52]:
evaluator.plot_event_rate()

(Figure({
     'data': [{'line': {'color': 'rgb(217,58,70)'},
               'mode': 'lines+markers',
               'name': '(-inf, 526.83)',
               'type': 'scatter',
               'x': array(['2022-01-01T00:01:00.000000000', '2022-01-31T00:01:00.000000000',
                           '2022-02-28T00:01:00.000000000', '2022-03-31T00:01:00.000000000',
                           '2022-04-30T00:01:00.000000000', '2022-05-31T00:01:00.000000000',
                           '2022-06-30T00:01:00.000000000', '2022-07-31T00:01:00.000000000',
                           '2022-08-31T00:01:00.000000000', '2022-09-30T00:01:00.000000000',
                           '2022-10-31T00:01:00.000000000', '2022-11-30T00:01:00.000000000',
                           '2022-12-31T00:01:00.000000000', '2023-01-31T00:01:00.000000000',
                           '2023-02-28T00:01:00.000000000', '2023-03-31T00:01:00.000000000',
                           '2023-04-30T00:01:00.000000000', '2023-05-31T00:01:0

In [53]:
evaluator.plot_psi(reference_last_period=True)

(Figure({
     'data': [{'mode': 'lines+markers',
               'name': 'acc_open_past_24mths (Test)',
               'type': 'scatter',
               'x': array(['2022-02-01T00:00:00.000000000', '2022-03-01T00:00:00.000000000',
                           '2022-04-01T00:00:00.000000000', '2022-05-01T00:00:00.000000000',
                           '2022-06-01T00:00:00.000000000', '2022-07-01T00:00:00.000000000',
                           '2022-08-01T00:00:00.000000000', '2022-09-01T00:00:00.000000000',
                           '2022-10-01T00:00:00.000000000', '2022-11-01T00:00:00.000000000',
                           '2022-12-01T00:00:00.000000000', '2023-01-01T00:00:00.000000000',
                           '2023-02-01T00:00:00.000000000', '2023-03-01T00:00:00.000000000',
                           '2023-04-01T00:00:00.000000000', '2023-05-01T00:00:00.000000000',
                           '2023-06-01T00:00:00.000000000', '2023-07-01T00:00:00.000000000',
                         

In [54]:
evaluator.plot_ks()

In [55]:
evaluator.plot_group_radar()

In [56]:
evaluator.plot_decile_ks()

### Teste de Stress

In [57]:
gen = LookAhead(id_cols=['id'], date_cols=['date']).fit(train)
evaluator.synthetic_gen = gen
stress = evaluator.run_stress_test()
stress['metrics']

NameError: name 'train' is not defined