In [1]:
# A) Setup: montado de Drive, imports, rutas y utils

from google.colab import drive
drive.mount('/content/drive')

import os, json, math, hashlib
from pathlib import Path
import numpy as np
import pandas as pd

# sklearn
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import (
    roc_auc_score, average_precision_score, accuracy_score, precision_score,
    recall_score, f1_score, brier_score_loss
)
from sklearn.ensemble import HistGradientBoostingClassifier

# Paths
ROOT = Path("/content/drive/MyDrive/CognitivaAI")
P11  = ROOT / "p11_alt_backbones"
P20  = ROOT / "p20_meta_calibration"

P20.mkdir(parents=True, exist_ok=True)

VAL_FEATS  = P11 / "val_patient_features_backbones.csv"
TEST_FEATS = P11 / "test_patient_features_backbones.csv"

assert VAL_FEATS.exists() and TEST_FEATS.exists(), "No encuentro los features VAL/TEST en p11_alt_backbones"

# Helpers de métrica
def metrics_from_scores(y_true, y_score, thr=0.5, cohort_name="ALL"):
    y_pred = (y_score >= thr).astype(int)
    out = {
        "AUC": float(roc_auc_score(y_true, y_score)) if len(np.unique(y_true))>1 else float("nan"),
        "PRAUC": float(average_precision_score(y_true, y_score)),
        "Acc": float(accuracy_score(y_true, y_pred)),
        "P": float(precision_score(y_true, y_pred, zero_division=0)),
        "R": float(recall_score(y_true, y_pred,  zero_division=0)),
        "F1": float(f1_score(y_true, y_pred,     zero_division=0)),
        "Brier": float(brier_score_loss(y_true, y_score)),
        "thr": float(thr),
        "n": int(len(y_true)),
        "cohort": cohort_name,
    }
    return out

def find_best_threshold(y_true, y_score, objective="f1", costs=(5.0, 1.0)):
    """
    objective:
      - 'f1'  : maximiza F1
      - 'cost': minimiza FN*costs[0] + FP*costs[1]
    """
    ts = np.linspace(0.05, 0.95, 19)
    best = None
    for t in ts:
        y_pred = (y_score >= t).astype(int)
        if objective == "f1":
            val = f1_score(y_true, y_pred, zero_division=0)
            key = -val  # minimizar negativo de F1
        else:
            # coste FN vs FP
            fn = np.sum((y_true==1) & (y_pred==0))
            fp = np.sum((y_true==0) & (y_pred==1))
            val = costs[0]*fn + costs[1]*fp
            key = val
        cand = (key, t)
        if (best is None) or (cand < best):
            best = cand
    return best[1]


Mounted at /content/drive


In [2]:
# B) Carga de features, limpieza de NaNs y selección de columnas
val = pd.read_csv(VAL_FEATS)
test = pd.read_csv(TEST_FEATS)

# Cohorte por ID (heurístico OAS1 / OAS2)
def cohort_from_pid(pid: str) -> str:
    return "OAS2" if str(pid).startswith("OAS2") else "OAS1"

val["cohort"]  = val["patient_id"].apply(cohort_from_pid)
test["cohort"] = test["patient_id"].apply(cohort_from_pid)

# Columnas base que no son features
meta_cols = ["patient_id", "y_true", "cohort"]

# Detectar columnas de probas (features) = todo menos meta
feat_cols = [c for c in val.columns if c not in meta_cols]

# Filtro por fracción de NaN > 0.4 en VAL (aplica a TEST para coherencia)
nan_ratio = val[feat_cols].isna().mean().sort_values(ascending=False)
keep_cols = [c for c in feat_cols if nan_ratio[c] <= 0.40]

print("Top-10 NaN ratio (VAL):\n", nan_ratio.head(10), "\n")
print(f"✅ Mantengo {len(keep_cols)} columnas; ❌ descarto por NaN>0.4: {len(feat_cols)-len(keep_cols)}")

# Datas finales
X_val = val[keep_cols].copy()
X_tst = test[keep_cols].copy()
y_val = val["y_true"].astype(int).values
y_tst = test["y_true"].astype(int).values

val.shape, test.shape, len(keep_cols)


Top-10 NaN ratio (VAL):
 patient_preds_ensemble_trimmed20    0.855072
patient_preds_ensemble_top7         0.855072
patient_preds_ensemble_p2           0.855072
patient_preds_ensemble_mean         0.855072
patient_preds_trimmed20             0.855072
patient_preds_top7                  0.855072
patient_preds_p2                    0.855072
patient_preds_mean                  0.855072
slices_preds_mean                   0.855072
slices_preds_p2                     0.855072
dtype: float64 

✅ Mantengo 36 columnas; ❌ descarto por NaN>0.4: 20


((69, 59), (70, 59), 36)

In [3]:
# C) OOF sin fuga para obtener scores meta "crudos" (antes de calibrar)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Pipelines (imputación + escalado donde aplica)
pipe_lr = Pipeline([
    ("imp", SimpleImputer(strategy="median")),
    ("sc", StandardScaler()),
    ("lr", LogisticRegression(max_iter=2000, solver="lbfgs", class_weight="balanced"))
])

pipe_hgb = HistGradientBoostingClassifier(
    max_depth=3, learning_rate=0.06, max_iter=200,
    l2_regularization=0.0, random_state=42
)

oof_lr  = np.zeros(len(X_val))
oof_hgb = np.zeros(len(X_val))

for tr, va in skf.split(X_val, y_val):
    X_tr, X_va = X_val.iloc[tr], X_val.iloc[va]
    y_tr, y_va = y_val[tr], y_val[va]

    pipe_lr.fit(X_tr, y_tr)
    oof_lr[va] = pipe_lr.predict_proba(X_va)[:,1]

    pipe_hgb.fit(X_tr, y_tr)
    oof_hgb[va] = pipe_hgb.predict_proba(X_va)[:,1]

# Entrenar modelos finales en todo VAL para inferir sobre TEST
pipe_lr.fit(X_val, y_val)
pipe_hgb.fit(X_val, y_val)

test_lr  = pipe_lr.predict_proba(X_tst)[:,1]
test_hgb = pipe_hgb.predict_proba(X_tst)[:,1]

print("OOF listo:", {"lr": oof_lr.shape, "hgb": oof_hgb.shape})


OOF listo: {'lr': (69,), 'hgb': (69,)}


In [4]:
# D) Calibración: Platt (sigmoide con LR) y Isotónica (no-paramétrica)
#  - Global (ALL)
#  - Por cohorte (OAS1, OAS2)
# Se calibra a partir de OOF (y_true de VAL) y se aplica a TEST

def fit_platt(y_true, scores):
    # Platt vía LR con 1 feature (scores)
    X = scores.reshape(-1,1)
    pl = LogisticRegression(max_iter=1000)
    pl.fit(X, y_true)
    return lambda s: pl.predict_proba(np.asarray(s).reshape(-1,1))[:,1]

def fit_isotonic(y_true, scores):
    # Isotónica requiere y_true con ambas clases
    if len(np.unique(y_true)) < 2:
        # fallback: identidad si no hay dos clases
        return lambda s: np.asarray(s, dtype=float)
    ir = IsotonicRegression(out_of_bounds="clip")
    ir.fit(scores, y_true)
    return lambda s: ir.predict(np.asarray(s, dtype=float))

def calibrate_by_group(val_df, base_scores, test_df, test_scores, group_col="cohort"):
    # val_df / test_df con columnas: y_true, cohort
    df_v = val_df[["y_true", group_col]].copy()
    df_t = test_df[[group_col]].copy()

    # 1) Global fit
    cal_platt_all   = fit_platt(df_v["y_true"].values, base_scores)
    cal_isotonic_all= fit_isotonic(df_v["y_true"].values, base_scores)

    out = {
        "global": {
            "platt":   cal_platt_all,
            "isotonic":cal_isotonic_all
        },
        "by_group": {}
    }

    # 2) Por grupo
    for g in df_v[group_col].unique():
        m = (df_v[group_col]==g).values
        cal_p = fit_platt(df_v.loc[m,"y_true"].values,  base_scores[m])
        cal_i = fit_isotonic(df_v.loc[m,"y_true"].values,base_scores[m])
        out["by_group"][g] = {"platt":cal_p, "isotonic":cal_i}

    # Aplicadores convenientes sobre arrays
    def apply_cal(mode="global", method="platt", scores=None, cohort=None):
        if mode=="global":
            f = out["global"][method]
            return f(scores)
        else:
            # por cohorte (si no existe calibrador para ese grupo, cae a global)
            res = np.zeros_like(scores, dtype=float)
            for g in np.unique(cohort):
                idx = (cohort==g)
                if g in out["by_group"]:
                    res[idx] = out["by_group"][g][method](scores[idx])
                else:
                    res[idx] = out["global"][method](scores[idx])
            return res

    return out, apply_cal

# Prepara DataFrames con y_true/cohort para reuso
val_ref  = val[["patient_id","y_true","cohort"]].copy()
test_ref = test[["patient_id","y_true","cohort"]].copy()

# Calibración de HGB (ejemplo principal) y de LR (secundario)
cal_hgb, hgb_apply = calibrate_by_group(val_ref, oof_hgb, test_ref, test_hgb, group_col="cohort")
cal_lr,  lr_apply  = calibrate_by_group(val_ref, oof_lr,  test_ref, test_lr,  group_col="cohort")

# Versiones de scores calibrados (global vs per-cohort)
val_hgb_platt_g   = hgb_apply("global", "platt", oof_hgb)
val_hgb_iso_g     = hgb_apply("global", "isotonic", oof_hgb)
tst_hgb_platt_g   = hgb_apply("global", "platt", test_hgb)
tst_hgb_iso_g     = hgb_apply("global", "isotonic", test_hgb)

val_hgb_platt_c   = hgb_apply("by_group","platt", oof_hgb, cohort=val["cohort"].values)
val_hgb_iso_c     = hgb_apply("by_group","isotonic", oof_hgb, cohort=val["cohort"].values)
tst_hgb_platt_c   = hgb_apply("by_group","platt", test_hgb, cohort=test["cohort"].values)
tst_hgb_iso_c     = hgb_apply("by_group","isotonic", test_hgb, cohort=test["cohort"].values)

print("Calibraciones listas (HGB/LR; global y por cohorte).")


Calibraciones listas (HGB/LR; global y por cohorte).


In [5]:
# E) Umbrales: F1 y coste (FN:FP=5:1 por defecto)
def eval_all(y_v, y_t, s_v, s_t, cohorts_v, cohorts_t, tag):
    # 1) Global - F1
    thr_f1 = find_best_threshold(y_v, s_v, objective="f1")
    m_val  = metrics_from_scores(y_v, s_v, thr=thr_f1, cohort_name="ALL")
    m_tst  = metrics_from_scores(y_t, s_t, thr=thr_f1, cohort_name="ALL")

    # 2) Global - Coste
    thr_cost = find_best_threshold(y_v, s_v, objective="cost", costs=(5.0,1.0))
    mc_val   = metrics_from_scores(y_v, s_v, thr=thr_cost, cohort_name="ALL")
    mc_tst   = metrics_from_scores(y_t, s_t, thr=thr_cost, cohort_name="ALL")

    # 3) Por cohorte (F1 y Coste)
    def per_cohort(y, s, cohorts, thr_map=None, objective="f1"):
        res = {}
        for g in ["OAS1","OAS2"]:
            idx = (cohorts==g)
            if idx.sum()==0 or len(np.unique(y[idx]))<1:
                res[g] = {"AUC": np.nan, "PRAUC": np.nan, "Acc": np.nan, "P": np.nan, "R": np.nan, "F1": np.nan, "Brier": np.nan, "thr": np.nan, "n": int(idx.sum()), "cohort": g}
                continue
            if thr_map is None:
                t = find_best_threshold(y[idx], s[idx], objective=objective, costs=(5.0,1.0))
            else:
                t = thr_map[g]
            res[g] = metrics_from_scores(y[idx], s[idx], thr=t, cohort_name=g)
        return res

    # Cohort thresholds from VAL (F1 and Cost)
    thr_map_f1   = {}
    thr_map_cost = {}
    for g in ["OAS1","OAS2"]:
        mv = per_cohort(y_v, s_v, cohorts_v, thr_map=None, objective="f1")[g]
        thr_map_f1[g] = mv["thr"] if not math.isnan(mv["thr"]) else 0.5
        mv = per_cohort(y_v, s_v, cohorts_v, thr_map=None, objective="cost")[g]
        thr_map_cost[g] = mv["thr"] if not math.isnan(mv["thr"]) else 0.5

    # Evaluación por cohorte aplicando los thr de VAL a TEST
    pv_f1  = per_cohort(y_v, s_v, cohorts_v, thr_map=thr_map_f1,   objective="f1")
    pt_f1  = per_cohort(y_t, s_t, cohorts_t, thr_map=thr_map_f1,   objective="f1")
    pv_cst = per_cohort(y_v, s_v, cohorts_v, thr_map=thr_map_cost, objective="cost")
    pt_cst = per_cohort(y_t, s_t, cohorts_t, thr_map=thr_map_cost, objective="cost")

    summary = {
        "tag": tag,
        "GLOBAL_F1":   {"VAL": m_val, "TEST": m_tst, "thr": thr_f1},
        "GLOBAL_COST": {"VAL": mc_val, "TEST": mc_tst, "thr": thr_cost},
        "COHORT_F1":   {"VAL": pv_f1, "TEST": pt_f1, "thr_map": thr_map_f1},
        "COHORT_COST": {"VAL": pv_cst, "TEST": pt_cst, "thr_map": thr_map_cost},
    }
    return summary

val_coh = val["cohort"].values
tst_coh = test["cohort"].values

SUMMARIES = []
# HGB calibrated: global/platt, global/isotonic, cohort/platt, cohort/isotonic
SUMMARIES.append(eval_all(y_val, y_tst, val_hgb_platt_g, tst_hgb_platt_g, val_coh, tst_coh, tag="HGB|Platt|Global"))
SUMMARIES.append(eval_all(y_val, y_tst, val_hgb_iso_g,   tst_hgb_iso_g,   val_coh, tst_coh, tag="HGB|Isotonic|Global"))
SUMMARIES.append(eval_all(y_val, y_tst, val_hgb_platt_c, tst_hgb_platt_c, val_coh, tst_coh, tag="HGB|Platt|PerCohort"))
SUMMARIES.append(eval_all(y_val, y_tst, val_hgb_iso_c,   tst_hgb_iso_c,   val_coh, tst_coh, tag="HGB|Isotonic|PerCohort"))

# También LR calibrado como referencia
val_lr_platt_g = lr_apply("global", "platt", oof_lr)
tst_lr_platt_g = lr_apply("global", "platt", test_lr)
SUMMARIES.append(eval_all(y_val, y_tst, val_lr_platt_g, tst_lr_platt_g, val_coh, tst_coh, tag="LR|Platt|Global"))

# Vista rápida por consola
for s in SUMMARIES:
    print("\n===", s["tag"], "===")
    print("[GLOBAL_F1][VAL]",  s["GLOBAL_F1"]["VAL"])
    print("[GLOBAL_F1][TEST]", s["GLOBAL_F1"]["TEST"])



=== HGB|Platt|Global ===
[GLOBAL_F1][VAL] {'AUC': 0.7894736842105263, 'PRAUC': 0.755755246104314, 'Acc': 0.7101449275362319, 'P': 0.6170212765957447, 'R': 0.9354838709677419, 'F1': 0.7435897435897436, 'Brier': 0.18579872787793636, 'thr': 0.3, 'n': 69, 'cohort': 'ALL'}
[GLOBAL_F1][TEST] {'AUC': 0.6800986842105263, 'PRAUC': 0.6176791928144115, 'Acc': 0.6, 'P': 0.5434782608695652, 'R': 0.78125, 'F1': 0.6410256410256411, 'Brier': 0.22502621998297748, 'thr': 0.3, 'n': 70, 'cohort': 'ALL'}

=== HGB|Isotonic|Global ===
[GLOBAL_F1][VAL] {'AUC': 0.8234295415959253, 'PRAUC': 0.7489112881819754, 'Acc': 0.7246376811594203, 'P': 0.6304347826086957, 'R': 0.9354838709677419, 'F1': 0.7532467532467533, 'Brier': 0.15869148758953858, 'thr': 0.15, 'n': 69, 'cohort': 'ALL'}
[GLOBAL_F1][TEST] {'AUC': 0.6846217105263158, 'PRAUC': 0.5947630494505495, 'Acc': 0.6, 'P': 0.5434782608695652, 'R': 0.78125, 'F1': 0.6410256410256411, 'Brier': 0.24805148633873583, 'thr': 0.15, 'n': 70, 'cohort': 'ALL'}

=== HGB|Platt

In [6]:
# F) Guardado de salidas: CSVs con predicciones calibradas y JSON resumen

def save_preds(tag, pid, y_true, cohort, scores, prefix="p20"):
    df = pd.DataFrame({
        "patient_id": pid,
        "cohort": cohort,
        "y_true": y_true,
        "y_score": scores
    })
    out = P20 / f"{prefix}_{tag.replace('|','_')}_preds.csv"
    df.to_csv(out, index=False)
    return str(out)

# Guardar algunas variantes útiles
paths = {}
paths["VAL_HGB_Platt_Global"]   = save_preds("VAL_HGB_Platt_Global",   val["patient_id"].values, y_val, val_coh, val_hgb_platt_g)
paths["TEST_HGB_Platt_Global"]  = save_preds("TEST_HGB_Platt_Global",  test["patient_id"].values, y_tst, tst_coh, tst_hgb_platt_g)
paths["VAL_HGB_Isotonic_Global"]= save_preds("VAL_HGB_Isotonic_Global",val["patient_id"].values, y_val, val_coh, val_hgb_iso_g)
paths["TEST_HGB_Isotonic_Global"]=save_preds("TEST_HGB_Isotonic_Global",test["patient_id"].values, y_tst, tst_coh, tst_hgb_iso_g)
paths["VAL_HGB_Platt_PerC"]     = save_preds("VAL_HGB_Platt_PerC",     val["patient_id"].values, y_val, val_coh, val_hgb_platt_c)
paths["TEST_HGB_Platt_PerC"]    = save_preds("TEST_HGB_Platt_PerC",    test["patient_id"].values, y_tst, tst_coh, tst_hgb_platt_c)
paths["VAL_HGB_Isotonic_PerC"]  = save_preds("VAL_HGB_Isotonic_PerC",  val["patient_id"].values, y_val, val_coh, val_hgb_iso_c)
paths["TEST_HGB_Isotonic_PerC"] = save_preds("TEST_HGB_Isotonic_PerC", test["patient_id"].values, y_tst, tst_coh, tst_hgb_iso_c)

# Resumen JSON con todas las tablas de métricas y rutas de salida
summary = {
    "inputs": {
        "VAL_FEATS": str(VAL_FEATS),
        "TEST_FEATS": str(TEST_FEATS),
        "kept_features": keep_cols
    },
    "summaries": SUMMARIES,
    "paths": paths
}
SUM_PATH = P20 / "p20_calibration_thresholding_summary.json"
with open(SUM_PATH, "w") as f:
    json.dump(summary, f, indent=2)

print("💾 Guardados en:", P20)
print("• JSON resumen:", SUM_PATH)
for k,v in paths.items():
    print("•", k, "->", v)


💾 Guardados en: /content/drive/MyDrive/CognitivaAI/p20_meta_calibration
• JSON resumen: /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_calibration_thresholding_summary.json
• VAL_HGB_Platt_Global -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_VAL_HGB_Platt_Global_preds.csv
• TEST_HGB_Platt_Global -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_TEST_HGB_Platt_Global_preds.csv
• VAL_HGB_Isotonic_Global -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_VAL_HGB_Isotonic_Global_preds.csv
• TEST_HGB_Isotonic_Global -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_TEST_HGB_Isotonic_Global_preds.csv
• VAL_HGB_Platt_PerC -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_VAL_HGB_Platt_PerC_preds.csv
• TEST_HGB_Platt_PerC -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_TEST_HGB_Platt_PerC_preds.csv
• VAL_HGB_Isotonic_PerC -> /content/drive/MyDrive/CognitivaAI/p20_meta_calibration/p20_VAL_HGB_Isotoni