# Binary Performance Evaluator – Demo Completo

Neste exemplo vamos treinar um classificador simples e explorar várias funcionalidades do `BinaryPerformanceEvaluator`. Usaremos dados sintéticos.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from riskpilot.evaluation import BinaryPerformanceEvaluator
from riskpilot.synthetic import LookAhead

from sklearn.utils.class_weight import compute_sample_weight

In [2]:
# --------------------------------------------------
# 1. CONFIGURAÇÕES
# --------------------------------------------------
FILE_PATH = "../../datasets/lending_club/accepted_2007_to_2018Q4.csv"
NROWS     = 10_000
TARGET_RAW = "loan_status"          # coluna original
TARGET     = "target"               # nome final binário

# --------------------------------------------------
# 2. LEITURA E CONVERSÃO DE TIPOS MISTOS
# --------------------------------------------------
def read_and_clean_csv_mixed_types(path, nrows=None, verbose=True):
    df = pd.read_csv(path, low_memory=False, nrows=nrows)

    # identificar colunas com tipos mistos
    for col in df.columns:
        types = df[col].dropna().map(type).value_counts()
        if len(types) > 1 and verbose:
            print(f"[!] '{col}' com múltiplos tipos: {dict(types)}")

        # tentativa de conversão automática p/ numérico
        if len(types) > 1:
            try:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            except Exception:
                df[col] = df[col].astype(str)

    return df

df = read_and_clean_csv_mixed_types(FILE_PATH, nrows=NROWS)

# drop de colunas não usadas (caso existam)
df.drop(columns=[c for c in ["member_id"] if c in df.columns], inplace=True)

# --------------------------------------------------
# 3. TARGET BINÁRIO
# --------------------------------------------------
bad_status = ["Charged Off", "Default", "Late (31-120 days)"]
df[TARGET] = df[TARGET_RAW].isin(bad_status).astype(int)

# --------------------------------------------------
# 4. DATA E ID
# --------------------------------------------------
#df["date"] = pd.to_datetime(df["issue_d"], format="%b-%Y", errors="coerce")

df["date"] = pd.to_datetime('202201', format="%Y%M", errors="coerce")

df.drop(columns=["issue_d", TARGET_RAW], inplace=True)

df.reset_index(drop=False, inplace=True)   # index ➜ nova coluna

# --------------------------------------------------
# 5. FEATURES NUMÉRICAS + IMPUTAÇÃO
# --------------------------------------------------
numeric_cols = df.select_dtypes(include=[np.number]).columns.drop(["id", TARGET])
na_threshold = 0.30
valid_cols = numeric_cols[df[numeric_cols].isna().mean() < na_threshold].tolist()
valid_cols.remove('index')

df[valid_cols] = df[valid_cols].fillna(df[valid_cols].median())

# --------------------------------------------------
# 6. DATAFRAME FINAL
# --------------------------------------------------

FEATURES = [
    "funded_amnt_inv",
    "annual_inc",
    "dti",
    "fico_range_high",
    "out_prncp_inv",
    "total_rec_prncp",
    "last_pymnt_amnt",
    "last_fico_range_low",
    "acc_open_past_24mths",
    "avg_cur_bal",
    "bc_open_to_buy",
    "mo_sin_old_rev_tl_op",
    "mo_sin_rcnt_rev_tl_op",
    "mo_sin_rcnt_tl",
    "mort_acc",
    "mths_since_recent_bc",
    "mths_since_recent_inq",
    "num_tl_op_past_12m",
    "percent_bc_gt_75",
    "total_bc_limit",
    #"sub_grade"
]

ID_COLS = ['id']

IGNORE_COLS = ['member_id']

DATE_COLS = ['date']

# --------------------------------------------------
# Preparar dataset base
# --------------------------------------------------
df_model = df[ID_COLS+DATE_COLS+[TARGET] + FEATURES].dropna().copy()

In [3]:
ID_COLS

['id']

In [4]:
# df_model['date'] = pd.to_datetime(df_model['date'], format='%yyyy%mm')
# Cria a coluna yyyymm como inteiro
df_model['date'] = df_model['date'].dt.year * 100 + df_model['date'].dt.month


#df_model['yyyymm_datetime'] = df_model['date'].dt.to_period('M').dt.to_timestamp()
#DATE_COLS = ['date']
DATE_COLS

['date']

In [5]:
import numpy as np
from riskpilot.synthetic import LookAhead

gen = LookAhead(
    id_cols=ID_COLS,
    date_cols=DATE_COLS
).fit(df_model)

In [6]:
print(df_model['date'].min())
print(df_model['date'].max())

202201
202201


In [7]:
synthetic = gen.generate(
    n_periods=36,
    freq='ME',
    scenario='stress')
print(synthetic['date'].min())
print(synthetic['date'].max())

display(synthetic.head())

202202
202501


Unnamed: 0,id,date,target,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,...,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
0,64088cbaa65b4d369c658b9771cedfef,202202,1,1680.46933,43000.0,25.471291,674.0,0.0,979.960198,290.823111,...,2.445705,127.0,7.0,5.901403,1,13.0,17.0,1.0,75.0,3693.459891
1,05f2c58a62ff4843bae6541d997fd819,202202,0,12601.380305,90000.0,23.338011,674.0,0.0,20013.35408,1557.002085,...,795.560055,168.0,4.0,1.0,5,15.0,1.0,4.0,50.0,4100.0
2,b24242bf4e054483ba7d0dfb8d34ea33,202202,0,5784.431582,80000.0,11.632748,799.0,0.0,2997.757078,276.84167,...,16011.861313,486.566164,1.0,0.0,13,26.0,0.0,4.0,0.0,25200.0
3,5057d3dfa41a4cf281e1662ea7e75ef2,202202,0,12022.713009,38000.0,16.902878,684.0,0.0,9000.0,11313.174482,...,11955.972001,355.217824,14.0,2.0,1,7.0,1.0,1.0,0.0,26001.275666
4,60d756a76572412c8d528e0b6f0a5f5c,202202,1,1000.357484,62000.0,11.677518,694.0,0.0,831.511063,427.26324,...,1800.448439,188.0,3.0,1.0,1,36.480449,7.0,3.0,0.0,3000.0


In [8]:
custom_noise = {
    'utilifico_range_highzation': {'func': np.random.exponential, 'kwargs': {'scale':50}},
}
custom_gen = LookAhead(
    id_cols=['id'],
    date_cols=['date'],
    custom_noise=custom_noise).fit(df_model)

custom = custom_gen.generate(
    n_periods=2,
    freq='ME',
    scenario='stress'
    )

custom.head()
print(custom['date'].min())
print(custom['date'].max())

display(custom.head())



202202
202203


Unnamed: 0,id,date,target,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,...,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
0,d646031cae64498a83ec0be3ed6822f1,202202,0,2067.637511,36000.0,6.743116,669.0,0.0,1000.201705,220.166571,...,622.557051,121.0,12.0,5.0,0,44.237909,5.0,2.0,0.0,2000.0
1,138d33b7cff945dd9e4c3b3bcba08564,202202,1,16000.0,70000.0,33.928111,789.0,0.0,3012.483593,10085.826149,...,27127.915485,180.0,3.0,7.0,2,2.0,17.0,3.0,66.7,154613.706466
2,95eefb7ebc7d488c8e82b3ec193e9f03,202202,1,8000.0,60000.0,14.865377,689.0,0.0,4497.015206,215.152529,...,13530.669617,52.293515,4.0,4.0,1,8.449097,2.0,3.0,12.5,16276.4506
3,8172c3bf87e8451dab81b0ae002ddd35,202202,0,20489.193786,88821.897239,7.709062,699.0,0.0,24000.0,2030.753284,...,8625.743232,316.576276,18.0,17.0,5,10.0,15.0,1.0,100.0,25340.038613
4,c27646feb69f4e85a030fddafb9e5b8e,202202,0,6375.506999,74000.0,29.85236,689.0,0.0,7000.0,101.8664,...,1368.777377,169.0,23.0,7.0,1,106.725286,2.0,0.0,100.0,23623.14437


In [9]:
df_model = pd.concat([df_model, synthetic], axis=0)

In [10]:
# --------------------------------------------------
# 2. Split treino / teste
# --------------------------------------------------
train_idx, test_idx = train_test_split(
    df_model.index,
    test_size=0.30,
    stratify=df_model[TARGET],
    random_state=42,
)

# Criar df_train e df_test primeiro
df_train = df_model.loc[train_idx].copy()
df_test  = df_model.loc[test_idx].copy()

# Depois extrair X e y a partir dos DataFrames
X_train, y_train = df_train[FEATURES], df_train[TARGET]
X_test , y_test  = df_test[FEATURES] , df_test[TARGET]

In [11]:
# --------------------------------------------------
# Garantir cópias independentes dos conjuntos de treino e teste
# --------------------------------------------------
X_train = df_train[FEATURES].copy()
X_test = df_test[FEATURES].copy()

# --------------------------------------------------
# Converter colunas categóricas (tipo object) para 'category' em X e df
# --------------------------------------------------
def convert_object_to_category(df):
    obj_cols = df.select_dtypes(include='object').columns
    df[obj_cols] = df[obj_cols].astype('category')
    return df

# Aplicar nos conjuntos usados no modelo
X_train = convert_object_to_category(X_train)
X_test = convert_object_to_category(X_test)

# Garantir que df_train e df_test também estejam com tipos consistentes
df_train = convert_object_to_category(df_train)
df_test = convert_object_to_category(df_test)

# --------------------------------------------------
# Verificações finais
# --------------------------------------------------
print("Colunas 'object' restantes em X_train:", X_train.dtypes[X_train.dtypes == 'object'])
print("Colunas 'object' restantes em df_test:", df_test.dtypes[df_test.dtypes == 'object'])


Colunas 'object' restantes em X_train: Series([], dtype: object)
Colunas 'object' restantes em df_test: Series([], dtype: object)


In [12]:
num_cols = df_train.select_dtypes(include=[float, int]).columns.tolist()

for col in ['index', 'id', TARGET]:
    try:
        num_cols.remove(col)
    except:
        pass   

In [13]:
# Aplicar o DynamicScaler no treino
from riskpilot.utils.scaler import DynamicScaler
scaler = DynamicScaler(
    strategy="auto",
    shapiro_p_val = 0.01, # se aumentar fica mais restritiva a escolha de StandardScaler()
    #serialize=True,
    #save_path="scalers.pkl", # arquivo com informações de escalonamento salvas 
    random_state=42,
    ignore_cols=ID_COLS+IGNORE_COLS+DATE_COLS+[TARGET]
)

scaler.fit(df_train)

# Relatório das decisões
print("\nResumo das decisões por feature:")
display(scaler.report_as_df())

INFO: Ignoring columns (no scaling): ['date', 'target']



Resumo das decisões por feature:


Unnamed: 0,chosen_scaler,validation_stats,ignored,candidates_tried,reason
funded_amnt_inv,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
annual_inc,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.25883383964778...",[],[PowerTransformer],stats|skew|kurt
dti,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.31074266651550...",[],[PowerTransformer],stats|skew|kurt
fico_range_high,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
out_prncp_inv,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
total_rec_prncp,,"{'post_std': nan, 'post_iqr': nan, 'post_n_uni...",[],"[PowerTransformer, QuantileTransformer, Robust...",all_rejected
last_pymnt_amnt,PowerTransformer,"{'post_std': 1.0, 'post_iqr': 1.57758984077859...",[],[PowerTransformer],stats|skew|kurt
last_fico_range_low,PowerTransformer,"{'post_std': 0.9999999999999999, 'post_iqr': 1...",[],[PowerTransformer],stats|skew|kurt
acc_open_past_24mths,PowerTransformer,"{'post_std': 0.9999999999999999, 'post_iqr': 1...",[],[PowerTransformer],stats|skew|kurt
avg_cur_bal,PowerTransformer,"{'post_std': 0.9999999999999999, 'post_iqr': 1...",[],[PowerTransformer],stats|skew|kurt


In [14]:
display(X_train.head())

# 5. Transformar o conjunto de treino
df_train_scaled = scaler.transform(df_train, return_df=True)
X_train_scaled = scaler.transform(X_train, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_train_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
2323,20800.0,109000.0,32.55,709.0,1380.39,19419.61,650.94,625.0,1.0,14188.0,3480.0,153.0,28.0,10.0,3.0,28.0,10.0,1.0,14.3,8800.0
2323,6400.0,44326.96338,22.239569,664.0,0.0,1530.466588,400.135533,510.0,11.0,1654.014555,1514.578254,164.0,3.0,1.0,0.0,4.0,3.0,4.0,66.7,7300.0
16081,18000.0,50000.0,21.842074,684.0,0.0,10000.0,300.260335,685.0,3.0,9126.145146,3874.79646,166.0,6.0,3.0,4.0,10.0,9.0,5.0,33.3,6000.0
28947,11000.0,68000.0,11.980068,719.0,15672.493832,5883.854656,359.035539,810.0,6.0,24251.252649,19426.055683,100.698422,9.0,2.0,0.0,2.0,2.0,3.0,0.0,26010.693823
2905,24000.0,147000.0,23.81,714.0,0.0,24000.0,16026.45,725.0,2.0,8511.0,20239.0,184.0,7.0,7.0,2.0,80.0,5.0,1.0,25.0,58500.0


INFO: Untouched columns preserved: ['id', 'date', 'target']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
2323,20800.0,0.901128,1.224344,709.0,1380.39,19419.61,-0.396687,-0.566489,-1.462,0.5232,-0.258031,153.0,1.221213,0.79087,0.948472,0.678946,0.866477,-0.608237,-0.525287,-0.608031
2323,6400.0,-0.794475,0.419812,664.0,0.0,1530.466588,-0.633694,-1.642417,1.584364,-1.312304,-0.708497,164.0,-0.733274,-1.340783,-1.134461,-0.951355,-0.389054,0.96193,0.741226,-0.757333
16081,18000.0,-0.558499,0.383897,684.0,0.0,10000.0,-0.768212,0.090983,-0.476236,0.117148,-0.193861,166.0,-0.170393,-0.448727,1.216725,-0.216256,0.736022,1.300851,0.061365,-0.905394
28947,11000.0,0.031151,-0.706591,719.0,15672.493832,5883.854656,-0.684925,1.675829,0.487014,1.038451,0.965692,100.698422,0.183901,-0.814644,-1.134461,-1.430986,-0.69838,0.559505,-1.499756,0.441037
2905,24000.0,1.431505,0.557438,714.0,0.0,24000.0,1.484337,0.56635,-0.910857,0.05438,1.000623,184.0,-0.037348,0.408406,0.577897,1.617241,0.081209,-0.608237,-0.163342,1.477129


In [15]:
display(X_test.head())

# 5. Transformar o conjunto de treino
df_test_scaled = scaler.transform(df_test, return_df=True)
X_test_scaled = scaler.transform(X_test, return_df=True)

print("\nExemplo de dados escalados (treino):")
display(X_test_scaled.head())

Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
7702,2500.0,75000.0,17.69,689.0,0.0,2500.0,756.0,620.0,12.0,21130.0,163.0,306.0,8.0,4.0,7.0,8.0,7.0,4.0,100.0,34500.0
7702,5691.063525,30000.0,20.543915,714.0,6490.464345,2230.567429,781.471297,695.0,1.0,1303.264282,22217.183455,122.0,26.0,7.0,0.0,30.0,5.0,1.0,0.0,11300.0
27501,12000.0,75000.0,31.989242,669.0,0.0,12958.935741,5484.641719,723.357858,6.0,1655.100897,2151.950839,143.0,5.0,3.0,0.0,12.0,7.0,2.0,100.0,6500.0
23665,21000.0,109899.940797,15.168199,689.0,0.0,8155.302536,7801.434493,640.0,1.0,13561.468207,254.994001,112.0,7.0,6.0,0.0,5.221468,0.0,0.0,100.0,7700.0
851,18000.0,45000.0,10.48,694.0,0.0,18000.0,566.5,705.0,5.0,4977.0,22939.0,107.0,1.0,1.0,0.0,1.0,9.0,5.0,14.3,37300.0


INFO: Untouched columns preserved: ['id', 'date', 'target']



Exemplo de dados escalados (treino):


Unnamed: 0,funded_amnt_inv,annual_inc,dti,fico_range_high,out_prncp_inv,total_rec_prncp,last_pymnt_amnt,last_fico_range_low,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_inq,num_tl_op_past_12m,percent_bc_gt_75,total_bc_limit
7702,2500.0,0.215231,-0.021943,689.0,0.0,2500.0,-0.321465,-0.618284,1.763303,0.903672,-1.58849,306.0,0.079603,-0.169047,1.731521,-0.402749,0.440992,0.96193,1.244885,0.774291
7702,5691.063525,-1.579457,0.263301,714.0,6490.464345,2230.567429,-0.304653,0.207036,-1.462,-1.494999,1.081215,122.0,1.152533,0.408406,-1.134461,0.740187,0.081209,-0.608237,-1.499756,-0.394908
27501,12000.0,0.215231,1.185678,669.0,0.0,12958.935741,0.787511,0.546248,0.487014,-1.311796,-0.527901,143.0,-0.324586,-0.448727,-1.134461,-0.061314,0.440992,0.060115,1.244885,-0.845999
23665,21000.0,0.915924,-0.302058,689.0,0.0,8155.302536,1.008423,-0.408346,-1.462,0.480903,-1.443657,112.0,-0.037348,0.245935,-1.134461,-0.746946,-1.716246,-1.661001,1.244885,-0.715551
851,18000.0,-0.76479,-0.922763,694.0,0.0,18000.0,-0.46554,0.324946,0.205028,-0.415402,1.109213,107.0,-1.442496,-1.340783,-1.134461,-1.813591,0.736022,1.300851,-0.525287,0.871373


In [16]:
# # Visualizar
# scaler.plot_histograms(
#     df_train,
#     X_train_scaled,
#     features=num_cols)

In [17]:
# encoder = WOEGuard(
#     categorical_cols=categorical_cols,
#     drop_original=True,       # mantém ou remove colunas originais
#     suffix='_woe',            # sufixo para as novas colunas
#     alpha=0.5,                # suavização Laplace
#     default_woe=0.0,          # WoE default para categorias novas
#     include_nan=True          # trata NaN como categoria separada
# )

In [18]:
# # 2. Ajustar e transformar
# df_transformed = encoder.fit_transform(
#     X=df[categorical_cols], # a lista de colunas não precisa ser igual à categorical_cols, mas somente as que forem instanciadas no WOEGuard que serão codificadas.
#     y=df[TARGET]
# )
# display(df_transformed)

In [19]:
sample_weights_train = compute_sample_weight(class_weight='balanced', y=y_train)

# # Modelo 1: Regressão Logística
# model1 = LogisticRegression(
#     max_iter=2000,
#     solver='lbfgs',
#     class_weight=None,      # Removido porque vamos usar sample_weight manualmente
#     C=1.0
# )
# model1.fit(X_train, y_train, sample_weight=sample_weights_train)


# Modelo 2: XGBoost


model2 = XGBClassifier(
    n_estimators=150,
    #class_weights='balanced',
    learning_rate=0.05,
    max_depth=5,
    subsample=0.85,
    colsample_bytree=0.80,
    #scale_pos_weight=1.0,
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1,
    #enable_categorical=True,
    #tree_method='hist'  # necessário com enable_categorical=True
)
model2.fit(
    X_train_scaled,
    y_train,
    sample_weight=sample_weights_train
    );

In [20]:
# gambiarra rapida
#df_train = pd.concat([X_train_scaled, y_train], axis=1)
#df_test = pd.concat([X_test_scaled, y_train], axis=1)

In [21]:
#df_train.shape

df_train['date'].min(),df_train['date'].max()

(202201, 202501)

In [22]:
evaluator = BinaryPerformanceEvaluator(
    model=model2,
    df_train=df_train,
    df_test=df_test,
    target_col='target',
    id_cols=['id'],
    date_col='date',
    homogeneous_group='auto'
    #group_col='group'
)

In [23]:
metrics = evaluator.compute_metrics()
metrics

Unnamed: 0_level_0,MCC,AUC_ROC,AUC_PR,Precision,Recall,Brier
Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Train,-0.427396,0.080835,0.097026,0.141908,0.766323,0.741918
Test,-0.426755,0.087091,0.097867,0.142351,0.76369,0.739465


In [24]:
# falta validar matriz de confusao

# evaluator.plot_confusion(
#     evaluator.df_test['target'],
#     evaluator.df_test['y_pred_proba'],
#     #threshold='ks'
# )

In [25]:
#evaluator.plot_calibration()

In [26]:
# hover text nao foi melhorado

#evaluator.plot_event_rate()

In [27]:
# falta aplicar a separação por graficos com parametro features
# feature='acc_open_past_24_mths'

#evaluator.plot_psi()

In [28]:
#evaluator.plot_ks()

In [29]:
evaluator.plot_group_radar()

In [None]:
evaluator.plot_decile_ks()

### Teste de Stress

In [None]:
gen = LookAhead(id_cols=['id'], date_cols=['date']).fit(train)
evaluator.synthetic_gen = gen
stress = evaluator.run_stress_test()
stress['metrics']