# Avalia√ß√£o com modelo alternativo (Random Forest)

Notebook para comparar um modelo **sem CatBoost** usando a mesma l√≥gica operacional de **Top-K estratificado por fase**.

In [None]:
import json
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

# ----------------------------
# 0) CONFIGURA√á√ÉO DE CAMINHOS
# ----------------------------
# Ajuste conforme seu ambiente (local/Drive)
ROOT_DIR = Path("..").resolve() 
BASE_DIR = ROOT_DIR / "notebooks"

FEAT_DIR = BASE_DIR / "data"
OUT_DIR = BASE_DIR / "models"

TRAIN_FEAT_FILE = "train_feat__piora__2022_2023.parquet"
VALID_FEAT_FILE = "valid_feat__piora__2023_2024.parquet"

K_MAIN = 15
K_LIST = [10, 15, 20, 25]

TARGET_COL = "target"
FASE_COL = "fase"
SCORE_COL = "score"
PROBA_COL = "proba"
ALERT_COL = "alerta"

OUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"FEAT_DIR: {FEAT_DIR.resolve()}")
print(f"OUT_DIR : {OUT_DIR.resolve()}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
FEAT_DIR: /content/drive/MyDrive/Estudos/Tech_challenge/fase_5/features
OUT_DIR : /content/drive/MyDrive/Estudos/Tech_challenge/fase_5/models/rf_evaluation


In [51]:
# ----------------------------
# 1) CARREGAR DADOS
# ----------------------------
train_path = FEAT_DIR / TRAIN_FEAT_FILE
valid_path = FEAT_DIR / VALID_FEAT_FILE

if not train_path.exists() or not valid_path.exists():
    raise FileNotFoundError(
        f"Arquivos n√£o encontrados. Verifique FEAT_DIR e nomes de arquivo.\n"
        f"Esperado: {train_path} e {valid_path}"
    )

train_feat = pd.read_parquet(train_path)
valid_feat = pd.read_parquet(valid_path)

print("Train shape:", train_feat.shape)
print("Valid shape:", valid_feat.shape)

required_cols = {TARGET_COL, FASE_COL}
missing_train = required_cols - set(train_feat.columns)
missing_valid = required_cols - set(valid_feat.columns)
if missing_train or missing_valid:
    raise ValueError(f"Colunas obrigat√≥rias faltando. train={missing_train} valid={missing_valid}")

Train shape: (600, 122)
Valid shape: (765, 122)


In [52]:
# ----------------------------
# 2) TREINAR RANDOM FOREST
# ----------------------------
drop_cols = [TARGET_COL]
feature_cols = [c for c in train_feat.columns if c not in drop_cols]

X_train = train_feat[feature_cols].copy()
y_train = train_feat[TARGET_COL].astype(int).copy()

X_valid = valid_feat[feature_cols].copy()
y_valid = valid_feat[TARGET_COL].astype(int).copy()

cat_cols = X_train.select_dtypes(include=["object", "category", "string", "bool"]).columns.tolist()
num_cols = [c for c in X_train.columns if c not in cat_cols]

numeric_pipe = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
])

categorical_pipe = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore")),
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_pipe, num_cols),
        ("cat", categorical_pipe, cat_cols),
    ]
)

rf = RandomForestClassifier(
    n_estimators=300,       
    max_depth=12,            
    min_samples_leaf=5,     
    max_features="sqrt",     
    class_weight="balanced",
    random_state=42,
    n_jobs=-1,
)

# from lightgbm import LGBMClassifier
# lgbm = LGBMClassifier(
#     n_estimators=500,
#     learning_rate=0.05,
#     num_leaves=31,
#     min_child_samples=20,
#     class_weight="balanced",
#     random_state=42,
#     n_jobs=-1,
# )

model = Pipeline(steps=[
    ("prep", preprocessor),
    ("rf", rf),
])

model.fit(X_train, y_train)
valid_proba = model.predict_proba(X_valid)[:, 1]
valid_score = (100 * valid_proba).round(4)

auc = roc_auc_score(y_valid, valid_proba)
print(f"‚úÖ AUC (valid): {auc:.6f}")

‚úÖ AUC (valid): 0.732046


In [53]:
# ----------------------------
# 3) FUN√á√ïES TOP-K E M√âTRICAS
# ----------------------------
def stratified_topk_alert(
    df: pd.DataFrame,
    score_col: str = SCORE_COL,
    fase_col: str = FASE_COL,
    k_pct: float = 15.0,
    alert_col: str = ALERT_COL,
) -> pd.DataFrame:
    out = df.copy()
    out[alert_col] = 0

    for _, g in out.groupby(fase_col, dropna=False):
        n = len(g)
        if n == 0:
            continue
        k = max(1, int(np.ceil(n * k_pct / 100)))
        idx = g.sort_values(score_col, ascending=False).head(k).index
        out.loc[idx, alert_col] = 1

    return out


def operational_metrics_topk(
    df: pd.DataFrame,
    score_col: str = SCORE_COL,
    target_col: str = TARGET_COL,
    fase_col: str = FASE_COL,
    k_pct: float = 15.0,
    alert_col: str = ALERT_COL,
) -> dict:
    tmp = stratified_topk_alert(df, score_col, fase_col, k_pct, alert_col)

    y = tmp[target_col].astype(int).values
    a = tmp[alert_col].astype(int).values

    tp = int(((a == 1) & (y == 1)).sum())
    fp = int(((a == 1) & (y == 0)).sum())
    fn = int(((a == 0) & (y == 1)).sum())

    recall = tp / (tp + fn) if (tp + fn) else 0.0
    precision = tp / (tp + fp) if (tp + fp) else 0.0
    base_rate = float(y.mean()) if len(y) else 0.0
    lift = (precision / base_rate) if base_rate > 0 else np.nan

    return {
        "k_pct": float(k_pct),
        "n_total": int(len(y)),
        "n_alert": int(a.sum()),
        "n_pos": int(y.sum()),
        "base_rate": base_rate,
        "tp": tp,
        "fp": fp,
        "fn": fn,
        "recall@k": float(recall),
        "precision@k": float(precision),
        "lift@k": float(lift) if np.isfinite(lift) else None,
        "df_with_alerts": tmp,
    }


def operational_metrics_topk_by_fase(
    df: pd.DataFrame,
    score_col: str = SCORE_COL,
    target_col: str = TARGET_COL,
    fase_col: str = FASE_COL,
    k_pct: float = 15.0,
    alert_col: str = ALERT_COL,
) -> pd.DataFrame:
    tmp = stratified_topk_alert(df, score_col, fase_col, k_pct, alert_col)

    rows = []
    for fase, g in tmp.groupby(fase_col, dropna=False):
        y = g[target_col].astype(int).values
        a = g[alert_col].astype(int).values

        tp = int(((a == 1) & (y == 1)).sum())
        fp = int(((a == 1) & (y == 0)).sum())
        fn = int(((a == 0) & (y == 1)).sum())

        recall = tp / (tp + fn) if (tp + fn) else 0.0
        precision = tp / (tp + fp) if (tp + fp) else 0.0
        base_rate = float(y.mean()) if len(y) else 0.0
        lift = (precision / base_rate) if base_rate > 0 else np.nan

        rows.append({
            fase_col: fase,
            "n": int(len(g)),
            "n_alert": int(a.sum()),
            "n_pos": int(y.sum()),
            "base_rate": base_rate,
            "recall@k": float(recall),
            "precision@k": float(precision),
            "lift@k": float(lift) if np.isfinite(lift) else None,
        })

    return pd.DataFrame(rows).sort_values(by=fase_col).reset_index(drop=True)

In [54]:
# ----------------------------
# 4) AVALIAR E SALVAR RESULTADOS
# ----------------------------
valid_out = valid_feat.copy()
valid_out[PROBA_COL] = valid_proba
valid_out[SCORE_COL] = valid_score

metrics = {
    "model_name": "RandomForestClassifier",
    "auc_valid": float(auc),
    "n_valid": int(len(valid_out)),
    "base_rate_valid": float(valid_out[TARGET_COL].mean()),
    "topk": [],
    "k_main": K_MAIN,
}

print(f"‚úÖ Base rate (valid): {metrics['base_rate_valid']:.4f} | N={metrics['n_valid']}")

for k in K_LIST:
    res = operational_metrics_topk(
        valid_out,
        score_col=SCORE_COL,
        target_col=TARGET_COL,
        fase_col=FASE_COL,
        k_pct=k,
        alert_col=ALERT_COL,
    )

    metrics["topk"].append({kk: vv for kk, vv in res.items() if kk != "df_with_alerts"})

    print(
        f"K={k}% | Recall@K={res['recall@k']:.3f} | "
        f"Precision@K={res['precision@k']:.3f} | "
        f"Lift@K={res['lift@k'] if res['lift@k'] is not None else np.nan:.2f} | "
        f"Alerts={res['n_alert']}/{res['n_total']}"
    )

topk_by_fase = operational_metrics_topk_by_fase(
    valid_out,
    score_col=SCORE_COL,
    target_col=TARGET_COL,
    fase_col=FASE_COL,
    k_pct=K_MAIN,
    alert_col=ALERT_COL,
)

res_main = operational_metrics_topk(
    valid_out,
    score_col=SCORE_COL,
    target_col=TARGET_COL,
    fase_col=FASE_COL,
    k_pct=K_MAIN,
    alert_col=ALERT_COL,
)
alerts_df = res_main["df_with_alerts"].copy()
alerts_df = alerts_df.sort_values([FASE_COL, SCORE_COL], ascending=[True, False])
alerts_only = alerts_df[alerts_df[ALERT_COL] == 1].copy()

stem = "rf__piora__train_2022_2023__valid_2023_2024"
metrics_path = OUT_DIR / f"metrics__{stem}.json"
byfase_path = OUT_DIR / f"topk_by_fase__k{K_MAIN}__{stem}.parquet"
alerts_path = OUT_DIR / f"alerts__k{K_MAIN}__{stem}.parquet"
validout_path = OUT_DIR / f"valid_out__{stem}.parquet"

with open(metrics_path, "w", encoding="utf-8") as f:
    json.dump(metrics, f, ensure_ascii=False, indent=2)

topk_by_fase.to_parquet(byfase_path, index=False)
alerts_only.to_parquet(alerts_path, index=False)
valid_out.to_parquet(validout_path, index=False)

print("\n‚úÖ Arquivos salvos em:", OUT_DIR)
print(" -", metrics_path.name)
print(" -", byfase_path.name)
print(" -", alerts_path.name)
print(" -", validout_path.name)

display(topk_by_fase.head(20))
cols = [c for c in ["ano_base", "fase", "turma", "score", "proba", "alerta", TARGET_COL] if c in alerts_only.columns]
display(alerts_only[cols].head(30))

‚úÖ Base rate (valid): 0.4092 | N=765
K=10% | Recall@K=0.166 | Precision@K=0.650 | Lift@K=1.59 | Alerts=80/765
K=15% | Recall@K=0.246 | Precision@K=0.647 | Lift@K=1.58 | Alerts=119/765
K=20% | Recall@K=0.326 | Precision@K=0.654 | Lift@K=1.60 | Alerts=156/765
K=25% | Recall@K=0.380 | Precision@K=0.610 | Lift@K=1.49 | Alerts=195/765

‚úÖ Arquivos salvos em: /content/drive/MyDrive/Estudos/Tech_challenge/fase_5/models/rf_evaluation
 - metrics__rf__piora__train_2022_2023__valid_2023_2024.json
 - topk_by_fase__k15__rf__piora__train_2022_2023__valid_2023_2024.parquet
 - alerts__k15__rf__piora__train_2022_2023__valid_2023_2024.parquet
 - valid_out__rf__piora__train_2022_2023__valid_2023_2024.parquet


Unnamed: 0,fase,n,n_alert,n_pos,base_rate,recall@k,precision@k,lift@k
0,0,174,27,40,0.229885,0.525,0.777778,3.383333
1,1,138,21,72,0.521739,0.166667,0.571429,1.095238
2,2,153,23,101,0.660131,0.207921,0.913043,1.383125
3,3,94,15,34,0.361702,0.264706,0.6,1.658824
4,4,67,11,24,0.358209,0.125,0.272727,0.761364
5,5,43,7,14,0.325581,0.285714,0.571429,1.755102
6,6,17,3,10,0.588235,0.3,1.0,1.7
7,7,20,3,5,0.25,0.6,1.0,4.0
8,8,59,9,13,0.220339,0.076923,0.111111,0.504274


Unnamed: 0,ano_base,fase,turma,score,proba,alerta,target
547,2023,0,ALFA G - G2/G3,62.4871,0.624871,1,1
587,2023,0,ALFA O - G2/G3,58.9638,0.589638,1,1
590,2023,0,ALFA L - G2/G3,58.7355,0.587355,1,1
629,2023,0,ALFA I - G2/G3,55.5934,0.555934,1,0
576,2023,0,ALFA U - G2/G3,55.5778,0.555778,1,0
661,2023,0,ALFA B - G2/G3,55.4338,0.554338,1,0
549,2023,0,ALFA G - G2/G3,55.2453,0.552453,1,1
545,2023,0,ALFA J - G2/G3,54.8626,0.548626,1,1
532,2023,0,ALFA E - G2/G3,54.2048,0.542048,1,1
641,2023,0,ALFA G - G2/G3,54.044,0.54044,1,1


In [None]:
# ----------------------------
# 5) COMPARAR COM CATBOOST
# ----------------------------
catboost_metrics_path = BASE_DIR / "models/evaluation_results.json"

if catboost_metrics_path.exists():
    with open(catboost_metrics_path, "r", encoding="utf-8") as f:
        cat_metrics = json.load(f)

    rf_topk = pd.DataFrame(metrics["topk"])[["k_pct", "recall@k", "precision@k", "lift@k"]].copy()
    rf_topk = rf_topk.rename(columns={
        "recall@k": "rf_recall@k",
        "precision@k": "rf_precision@k",
        "lift@k": "rf_lift@k",
    })

    cat_topk = pd.DataFrame(cat_metrics.get("topk", []))
    if not cat_topk.empty:
        cat_topk = cat_topk[["k_pct", "recall@k", "precision@k", "lift@k"]].copy()
        cat_topk = cat_topk.rename(columns={
            "recall@k": "cat_recall@k",
            "precision@k": "cat_precision@k",
            "lift@k": "cat_lift@k",
        })

        comp = rf_topk.merge(cat_topk, on="k_pct", how="left")
        display(comp)
    else:
        print("Arquivo de m√©tricas CatBoost encontrado, mas sem bloco topk.")
else:
    print("N√£o encontrei models/evaluation_results.json para compara√ß√£o autom√°tica.")

Unnamed: 0,k_pct,rf_recall@k,rf_precision@k,rf_lift@k,cat_recall@k,cat_precision@k,cat_lift@k
0,10.0,0.166134,0.65,1.588658,0.191693,0.75,1.833067
1,15.0,0.246006,0.647059,1.58147,0.27476,0.722689,1.766317
2,20.0,0.325879,0.653846,1.598058,0.341853,0.685897,1.676395
3,25.0,0.380192,0.610256,1.491521,0.408946,0.65641,1.604325


In [None]:
# ----------------------------
# 6) EXPERIMENTOS COMPARATIVOS
# ----------------------------
from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import OrdinalEncoder

experiment_results = []

def run_experiment(name, clf, X_tr, y_tr, X_val, y_val, valid_df, use_ordinal=False):
    if use_ordinal:
        cat_pipe_exp = Pipeline([
            ("imputer", SimpleImputer(strategy="most_frequent")),
            ("ordinal", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)),
        ])
    else:
        cat_pipe_exp = categorical_pipe
    
    prep_exp = ColumnTransformer(transformers=[
        ("num", numeric_pipe, num_cols),
        ("cat", cat_pipe_exp, cat_cols),
    ])
    
    pipe = Pipeline([("prep", prep_exp), ("clf", clf)])
    pipe.fit(X_tr, y_tr)
    
    proba = pipe.predict_proba(X_val)[:, 1]
    auc = roc_auc_score(y_val, proba)
    
    tmp = valid_df.copy()
    tmp[SCORE_COL] = (100 * proba).round(4)
    tmp[PROBA_COL] = proba
    
    res_k15 = operational_metrics_topk(tmp, k_pct=K_MAIN)
    
    return {
        "model": name,
        "auc": round(auc, 5),
        "recall@15": round(res_k15["recall@k"], 4),
        "precision@15": round(res_k15["precision@k"], 4),
        "lift@15": round(res_k15["lift@k"], 4) if res_k15["lift@k"] else None,
    }


# --- Experimento 1: RF atual (baseline) ---
experiment_results.append(run_experiment(
    "RF_baseline",
    RandomForestClassifier(n_estimators=300, max_depth=12, min_samples_leaf=5,
                           max_features="sqrt", class_weight="balanced",
                           random_state=42, n_jobs=-1),
    X_train, y_train, X_valid, y_valid, valid_feat
))

# --- Experimento 2: RF com OrdinalEncoder (mais leve, melhor p/ trees) ---
experiment_results.append(run_experiment(
    "RF_ordinal",
    RandomForestClassifier(n_estimators=300, max_depth=12, min_samples_leaf=5,
                           max_features="sqrt", class_weight="balanced",
                           random_state=42, n_jobs=-1),
    X_train, y_train, X_valid, y_valid, valid_feat,
    use_ordinal=True
))

# --- Experimento 3: RF mais profundo ---
experiment_results.append(run_experiment(
    "RF_deeper",
    RandomForestClassifier(n_estimators=500, max_depth=20, min_samples_leaf=3,
                           max_features="sqrt", class_weight="balanced",
                           random_state=42, n_jobs=-1),
    X_train, y_train, X_valid, y_valid, valid_feat,
    use_ordinal=True
))

# --- Experimento 4: ExtraTrees (mais r√°pido, √†s vezes melhor) ---
experiment_results.append(run_experiment(
    "ExtraTrees",
    ExtraTreesClassifier(n_estimators=300, max_depth=15, min_samples_leaf=5,
                         max_features="sqrt", class_weight="balanced",
                         random_state=42, n_jobs=-1),
    X_train, y_train, X_valid, y_valid, valid_feat,
    use_ordinal=True
))

# --- Experimento 5: HistGradientBoosting (nativo sklearn, suporta NaN) ---
from sklearn.ensemble import HistGradientBoostingClassifier

X_train_raw = train_feat[[c for c in train_feat.columns if c != TARGET_COL]].copy()
X_valid_raw = valid_feat[[c for c in valid_feat.columns if c != TARGET_COL]].copy()

for col in cat_cols:
    X_train_raw[col] = X_train_raw[col].astype("category").cat.codes.replace(-1, np.nan)
    X_valid_raw[col] = X_valid_raw[col].astype("category").cat.codes.replace(-1, np.nan)

hgb = HistGradientBoostingClassifier(
    max_iter=300,
    learning_rate=0.05,
    max_depth=8,
    min_samples_leaf=20,
    class_weight="balanced",
    random_state=42,
)
hgb.fit(X_train_raw, y_train)
hgb_proba = hgb.predict_proba(X_valid_raw)[:, 1]
hgb_auc = roc_auc_score(y_valid, hgb_proba)

tmp_hgb = valid_feat.copy()
tmp_hgb[SCORE_COL] = (100 * hgb_proba).round(4)
res_hgb = operational_metrics_topk(tmp_hgb, k_pct=K_MAIN)

experiment_results.append({
    "model": "HistGradBoost",
    "auc": round(hgb_auc, 5),
    "recall@15": round(res_hgb["recall@k"], 4),
    "precision@15": round(res_hgb["precision@k"], 4),
    "lift@15": round(res_hgb["lift@k"], 4) if res_hgb["lift@k"] else None,
})

# --- Refer√™ncia: CatBoost ---
cat_k15 = next((x for x in cat_metrics.get("topk", []) if x["k_pct"] == K_MAIN), {})
experiment_results.append({
    "model": "‚≠ê CatBoost_ref",
    "auc": round(cat_metrics.get("auc_valid", 0), 5),
    "recall@15": round(cat_k15.get("recall@k", 0), 4),
    "precision@15": round(cat_k15.get("precision@k", 0), 4),
    "lift@15": round(cat_k15.get("lift@k", 0), 4),
})

df_exp = pd.DataFrame(experiment_results).sort_values("lift@15", ascending=False)
print("\nüìä Compara√ß√£o de Experimentos (K=15%):")
display(df_exp.reset_index(drop=True))


üìä Compara√ß√£o de Experimentos (K=15%):


Unnamed: 0,model,auc,recall@15,precision@15,lift@15
0,‚≠ê CatBoost_ref,0.71515,0.2748,0.7227,1.7663
1,ExtraTrees,0.74887,0.2716,0.7143,1.7458
2,RF_baseline,0.73205,0.246,0.6471,1.5815
3,HistGradBoost,0.69207,0.23,0.605,1.4788


In [None]:
# --- Experimento 6: ExtraTrees tuned ---
experiment_results.append(run_experiment(
    "ExtraTrees_tuned",
    ExtraTreesClassifier(
        n_estimators=500,
        max_depth=20,
        min_samples_leaf=3,
        max_features=0.5,         
        class_weight="balanced",
        random_state=42,
        n_jobs=-1,
    ),
    X_train, y_train, X_valid, y_valid, valid_feat,
    use_ordinal=True
))

In [None]:
# --- Experimento 7: ExtraTrees + Platt Scaling ---
from sklearn.calibration import CalibratedClassifierCV

et_base = ExtraTreesClassifier(
    n_estimators=300, max_depth=15, min_samples_leaf=5,
    max_features="sqrt", class_weight="balanced",
    random_state=42, n_jobs=-1,
)

cat_pipe_cal = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("ordinal", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)),
])
prep_cal = ColumnTransformer(transformers=[
    ("num", numeric_pipe, num_cols),
    ("cat", cat_pipe_cal, cat_cols),
])

X_train_t = prep_cal.fit_transform(X_train, y_train)
X_valid_t = prep_cal.transform(X_valid)

et_calibrated = CalibratedClassifierCV(et_base, method="isotonic", cv=3)
et_calibrated.fit(X_train_t, y_train)

proba_cal = et_calibrated.predict_proba(X_valid_t)[:, 1]
auc_cal = roc_auc_score(y_valid, proba_cal)

tmp_cal = valid_feat.copy()
tmp_cal[SCORE_COL] = (100 * proba_cal).round(4)
res_cal = operational_metrics_topk(tmp_cal, k_pct=K_MAIN)

experiment_results.append({
    "model": "ExtraTrees_calibrated",
    "auc": round(auc_cal, 5),
    "recall@15": round(res_cal["recall@k"], 4),
    "precision@15": round(res_cal["precision@k"], 4),
    "lift@15": round(res_cal["lift@k"], 4) if res_cal["lift@k"] else None,
})

df_exp = pd.DataFrame(experiment_results).sort_values("lift@15", ascending=False)
print("\nüìä Compara√ß√£o de Experimentos (K=15%):")
display(df_exp.reset_index(drop=True))


üìä Compara√ß√£o de Experimentos (K=15%):


Unnamed: 0,model,auc,recall@15,precision@15,lift@15
0,‚≠ê CatBoost_ref,0.71515,0.2748,0.7227,1.7663
1,ExtraTrees,0.74887,0.2716,0.7143,1.7458
2,RF_deeper,0.74198,0.2684,0.7059,1.7252
3,RF_ordinal,0.72892,0.262,0.6891,1.6842
4,ExtraTrees_tuned,0.73967,0.2588,0.6807,1.6636
5,RF_baseline,0.73205,0.246,0.6471,1.5815
6,HistGradBoost,0.69207,0.23,0.605,1.4788
7,ExtraTrees_calibrated,0.7083,0.2268,0.5966,1.4582
