In [1]:
# === p22_A: Setup & paths ===
from pathlib import Path
import os, json, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import (
    roc_auc_score, average_precision_score, accuracy_score,
    precision_score, recall_score, f1_score, brier_score_loss
)
from sklearn.calibration import CalibratedClassifierCV

# Monta Drive si hace falta
try:
    from google.colab import drive
    drive.mount('/content/drive')
except Exception:
    pass

# Rutas
P11 = Path("/content/drive/MyDrive/CognitivaAI/p11_alt_backbones")
P22 = Path("/content/drive/MyDrive/CognitivaAI/p22_meta_elastic")
P22.mkdir(parents=True, exist_ok=True)

VAL_PATH  = P11/"val_patient_features_backbones.csv"
TEST_PATH = P11/"test_patient_features_backbones.csv"

assert VAL_PATH.exists() and TEST_PATH.exists(), "No encuentro los features de p11 (VAL/TEST)."

print("Rutas listas:", P11, P22)


Mounted at /content/drive
Rutas listas: /content/drive/MyDrive/CognitivaAI/p11_alt_backbones /content/drive/MyDrive/CognitivaAI/p22_meta_elastic


In [2]:
# === p22_B: Load + sanitize + cohort ===
val = pd.read_csv(VAL_PATH)
test = pd.read_csv(TEST_PATH)

# Esperamos columnas: ['patient_id','y_true', <features...>]
assert "patient_id" in val.columns and "y_true" in val.columns, "Faltan columnas clave en VAL"
assert "patient_id" in test.columns and "y_true" in test.columns, "Faltan columnas clave en TEST"

# Añade cohorte desde patient_id (OAS1/OAS2)
def cohort_from_pid(pid: str) -> str:
    pid = str(pid)
    if pid.startswith("OAS1"): return "OAS1"
    if pid.startswith("OAS2"): return "OAS2"
    return "UNK"

val["cohort"]  = val["patient_id"].map(cohort_from_pid)
test["cohort"] = test["patient_id"].map(cohort_from_pid)

# Selección de columnas numéricas de features (excluye id/target/cohort)
drop_like = {"patient_id","y_true","cohort"}
num_cols  = [c for c in val.columns if c not in drop_like]

# Filtra por NaN ratio≤0.4 (en VAL, que es nuestro set “de diseño”)
nan_ratio = val[num_cols].isna().mean().sort_values(ascending=False)
keep_cols = [c for c in num_cols if nan_ratio.get(c,0.0) <= 0.40]

print("VAL:", val.shape, "| TEST:", test.shape)
print("Cols totales:", len(num_cols), "| Mantengo:", len(keep_cols), "| Descarto:", len(num_cols) - len(keep_cols))

print("\nTop-10 NaN ratio (VAL):\n", nan_ratio.head(10))

# Construye matrices
X_val  = val[keep_cols].copy()
y_val  = val["y_true"].astype(int).values
X_test = test[keep_cols].copy()
y_test = test["y_true"].astype(int).values

val_ids, test_ids = val["patient_id"].values, test["patient_id"].values
val_coh, test_coh = val["cohort"].values, test["cohort"].values

print(f"\nShapes -> X_val:{X_val.shape} | X_test:{X_test.shape} | y_val:{y_val.shape} | y_test:{y_test.shape}")


VAL: (69, 59) | TEST: (70, 59)
Cols totales: 56 | Mantengo: 36 | Descarto: 20

Top-10 NaN ratio (VAL):
 patient_preds_ensemble_trimmed20    0.855072
patient_preds_ensemble_top7         0.855072
patient_preds_ensemble_p2           0.855072
patient_preds_ensemble_mean         0.855072
patient_preds_trimmed20             0.855072
patient_preds_top7                  0.855072
patient_preds_p2                    0.855072
patient_preds_mean                  0.855072
slices_preds_mean                   0.855072
slices_preds_p2                     0.855072
dtype: float64

Shapes -> X_val:(69, 36) | X_test:(70, 36) | y_val:(69,) | y_test:(70,)


In [3]:
# === p22_C: Utils (metrics + threshold search) ===
def eval_at_threshold(y_true, y_score, thr, cohort=None, tag="ALL"):
    y_pred = (y_score >= thr).astype(int)
    out = dict(
        AUC   = float(roc_auc_score(y_true, y_score)) if len(np.unique(y_true))>1 else np.nan,
        PRAUC = float(average_precision_score(y_true, y_score)),
        Acc   = float(accuracy_score(y_true, y_pred)),
        P     = float(precision_score(y_true, y_pred, zero_division=0)),
        R     = float(recall_score(y_true, y_pred, zero_division=0)),
        F1    = float(f1_score(y_true, y_pred, zero_division=0)),
        Brier = float(brier_score_loss(y_true, y_score)),
        thr   = float(thr),
        n     = int(len(y_true)),
        cohort= tag if cohort is None else str(cohort),
    )
    return out

def best_threshold_by_f1(y_true, y_score):
    # Barrido fino de thr (0..1)
    thrs = np.linspace(0.05, 0.95, 19)
    f1s  = []
    for t in thrs:
        pred = (y_score >= t).astype(int)
        f1s.append(f1_score(y_true, pred, zero_division=0))
    best_idx = int(np.argmax(f1s))
    return float(thrs[best_idx])

def report_split(y_true, y_score, thr, cohorts=None, title="GLOBAL_F1"):
    res = {"ALL": eval_at_threshold(y_true, y_score, thr, tag="ALL")}
    if cohorts is not None:
        for tag in sorted(np.unique(cohorts)):
            mask = (cohorts == tag)
            res[tag] = eval_at_threshold(y_true[mask], y_score[mask], thr, tag=tag)
    print(f"[{title}][VAL] {res['ALL']}")
    return res


In [5]:
# D) Calibración robusta (Platt / Isotónica) para base learners (LR y HGB)
# - Sin fugas (KFold en VAL)
# - Tolerante a NaNs (imputación donde hace falta)
# - Compatible con scikit-learn >=1.4 (usa 'estimator=' en CalibratedClassifierCV)
# - Guarda outputs y muestra métricas por cohorte y globales

from pathlib import Path
import numpy as np, pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression
from sklearn.experimental import enable_hist_gradient_boosting  # noqa: F401
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_score, recall_score, f1_score, brier_score_loss

# === 1) Detecta VAL/TEST y columnas ===
assert 'val' in globals() and 'test' in globals(), "Se esperaba DataFrames VAL/TEST desde celdas anteriores."
VAL = val.copy()
TEST = test.copy()

# Asegura columnas obligatorias
for df, name in [(VAL,'VAL'), (TEST,'TEST')]:
    for col in ['patient_id','y_true']:
        assert col in df.columns, f"{name} debe tener columna '{col}'"

# Si no viene 'cohort', la creamos en base al prefijo del patient_id
if 'cohort' not in VAL.columns:
    VAL['cohort'] = np.where(VAL['patient_id'].astype(str).str.startswith('OAS2'), 'OAS2', 'OAS1')
if 'cohort' not in TEST.columns:
    TEST['cohort'] = np.where(TEST['patient_id'].astype(str).str.startswith('OAS2'), 'OAS2', 'OAS1')

# Selección de columnas de features
if 'cols_keep' in globals() and isinstance(keep_cols, (list, tuple)) and len(keep_cols) > 0:
    feat_cols = [c for c in keep_cols if c not in ['patient_id','y_true','cohort']]
else:
    # autoselección: numéricas no meta/ID
    black = {'patient_id','y_true','cohort'}
    feat_cols = [c for c in VAL.columns if c not in black and pd.api.types.is_numeric_dtype(VAL[c])]
    # filtra columnas con demasiados NaNs (como en tus pipelines previos)
    nan_ratio = VAL[feat_cols].isna().mean().sort_values(ascending=False)
    feat_cols = [c for c in feat_cols if nan_ratio.get(c,0.0) <= 0.40]

assert len(feat_cols) > 0, "No hay columnas de features tras el filtrado."

X_val  = VAL[feat_cols].copy()
y_val  = VAL['y_true'].astype(int).values
pid_val = VAL['patient_id'].astype(str).values
coh_val = VAL['cohort'].astype(str).values

X_test = TEST[feat_cols].copy()
y_test = TEST['y_true'].astype(int).values
pid_test = TEST['patient_id'].astype(str).values
coh_test = TEST['cohort'].astype(str).values

print(f"VAL: {X_val.shape} | TEST: {X_test.shape} | feats={len(feat_cols)}")

# === 2) Helpers ===
def metrics(y_true, y_score, thr=0.5):
    y_pred = (y_score >= thr).astype(int)
    out = {
        'AUC': float(roc_auc_score(y_true, y_score)) if len(np.unique(y_true))==2 else np.nan,
        'PRAUC': float(average_precision_score(y_true, y_score)),
        'Acc': float(accuracy_score(y_true, y_pred)),
        'P': float(precision_score(y_true, y_pred, zero_division=0)),
        'R': float(recall_score(y_true, y_pred, zero_division=0)),
        'F1': float(f1_score(y_true, y_pred, zero_division=0)),
        'Brier': float(brier_score_loss(y_true, y_score)),
        'thr': float(thr),
        'n': int(len(y_true))
    }
    return out

def best_f1_threshold(y_true, y_score):
    # barrido fino; si quieres más fino, baja el paso
    grid = np.linspace(0.05, 0.95, 19)
    f1s = [f1_score(y_true, (y_score>=t).astype(int), zero_division=0) for t in grid]
    return float(grid[int(np.argmax(f1s))])

def oof_and_calibrated(X, y, Xte, base, method='sigmoid', n_splits=5, random_state=42):
    """
    - Entrena OOF del modelo base (sin calibrar).
    - Calibra con CalibratedClassifierCV(estimator=base, cv='prefit') por fold.
    - Devuelve: oof_base, oof_cal, test_cal (promedio de calibradores).
    """
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    oof_base = np.zeros(len(X))
    oof_cal  = np.zeros(len(X))
    test_cals = []

    for tr, va in skf.split(X, y):
        Xtr, Xva = X.iloc[tr], X.iloc[va]
        ytr, yva = y[tr], y[va]

        # clona base limpio por fold
        # (para pipelines, los re-creamos por tipo)
        if isinstance(base, str) and base=='LR':
            model = Pipeline([
                ('imp', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler(with_mean=False)),  # robusto con columnas con var ~0
                ('lr', LogisticRegression(max_iter=2000, C=1.0, class_weight=None))
            ])
        elif isinstance(base, str) and base=='HGB':
            model = HistGradientBoostingClassifier(
                max_depth=None, learning_rate=0.06, max_iter=200,
                l2_regularization=0.0, random_state=42
            )
        else:
            # si pasaron un estimador ya construido
            from sklearn.base import clone
            model = clone(base)

        # fit base
        model.fit(Xtr, ytr)

        # OOF base
        oof_base[va] = model.predict_proba(Xva)[:,1]

        # calibración sobre modelo prefit
        cal = CalibratedClassifierCV(estimator=model, method=method, cv='prefit')
        cal.fit(Xva, yva)  # se ajusta la función de calibración con el fold de validación

        # OOF calibrado
        oof_cal[va] = cal.predict_proba(Xva)[:,1]

        # pred test calibrado por fold
        test_cals.append(cal.predict_proba(Xte)[:,1])

    test_cal = np.mean(np.stack(test_cals, axis=1), axis=1)
    return oof_base, oof_cal, test_cal

# === 3) Ejecuta calibraciones (LR e HGB; Platt e Isotónica) ===
print("Calibrando LR (Platt/Isotonic)...")
lr_base = 'LR'  # se construye dentro por fold con imputación
lr_oof_base_sig, lr_oof_cal_sig, lr_test_cal_sig = oof_and_calibrated(X_val, y_val, X_test, base=lr_base, method='sigmoid')
lr_oof_base_iso, lr_oof_cal_iso, lr_test_cal_iso = oof_and_calibrated(X_val, y_val, X_test, base=lr_base, method='isotonic')

print("Calibrando HGB (Platt/Isotonic)...")
hgb_base = 'HGB'  # HGB tolera NaNs
hgb_oof_base_sig, hgb_oof_cal_sig, hgb_test_cal_sig = oof_and_calibrated(X_val, y_val, X_test, base=hgb_base, method='sigmoid')
hgb_oof_base_iso, hgb_oof_cal_iso, hgb_test_cal_iso = oof_and_calibrated(X_val, y_val, X_test, base=hgb_base, method='isotonic')

# === 4) Métricas (umbral F1 óptimo en VAL por cada setting) ===
def report_block(name, oof_cal, test_cal):
    thr = best_f1_threshold(y_val, oof_cal)
    print(f"\n== {name} | thr@F1(VAL)={thr:.2f} ==")
    res_val  = metrics(y_val,  oof_cal,  thr)
    res_test = metrics(y_test, test_cal, thr)
    print("[VAL]",  res_val)
    print("[TEST]", res_test)
    return name, thr, res_val, res_test

results = []
results.append(report_block("LR|Platt",     lr_oof_cal_sig,  lr_test_cal_sig))
results.append(report_block("LR|Isotonic",  lr_oof_cal_iso,  lr_test_cal_iso))
results.append(report_block("HGB|Platt",    hgb_oof_cal_sig, hgb_test_cal_sig))
results.append(report_block("HGB|Isotonic", hgb_oof_cal_iso, hgb_test_cal_iso))

# === 5) Guardado en Drive ===
OUT = Path("/content/drive/MyDrive/CognitivaAI/p22_meta_ablation")
OUT.mkdir(parents=True, exist_ok=True)

# VAL / TEST por método
pd.DataFrame({
    "patient_id": pid_val, "y_true": y_val,
    "LR_platt": lr_oof_cal_sig, "LR_iso": lr_oof_cal_iso,
    "HGB_platt": hgb_oof_cal_sig, "HGB_iso": hgb_oof_cal_iso
}).to_csv(OUT / "p22_val_calibrations.csv", index=False)

pd.DataFrame({
    "patient_id": pid_test, "y_true": y_test,
    "LR_platt": lr_test_cal_sig, "LR_iso": lr_test_cal_iso,
    "HGB_platt": hgb_test_cal_sig, "HGB_iso": hgb_test_cal_iso
}).to_csv(OUT / "p22_test_calibrations.csv", index=False)

# Resumen JSON
import json
json.dump(
    [
        {"name": n, "thr": t, "VAL": v, "TEST": te}
        for (n, t, v, te) in results
    ],
    open(OUT / "p22_calibration_summary.json", "w"),
    indent=2
)

print("\n💾 Guardados en:", str(OUT))
print("• p22_val_calibrations.csv")
print("• p22_test_calibrations.csv")
print("• p22_calibration_summary.json")



VAL: (69, 36) | TEST: (70, 36) | feats=36
Calibrando LR (Platt/Isotonic)...
Calibrando HGB (Platt/Isotonic)...

== LR|Platt | thr@F1(VAL)=0.35 ==
[VAL] {'AUC': 0.7321731748726655, 'PRAUC': 0.7145407614908541, 'Acc': 0.6086956521739131, 'P': 0.5370370370370371, 'R': 0.9354838709677419, 'F1': 0.6823529411764706, 'Brier': 0.20752393411010403, 'thr': 0.35, 'n': 69}
[TEST] {'AUC': 0.6681743421052632, 'PRAUC': 0.6464341487486046, 'Acc': 0.6142857142857143, 'P': 0.5454545454545454, 'R': 0.9375, 'F1': 0.6896551724137931, 'Brier': 0.21893862322613, 'thr': 0.35, 'n': 70}

== LR|Isotonic | thr@F1(VAL)=0.35 ==
[VAL] {'AUC': 0.8590831918505942, 'PRAUC': 0.8251318361887122, 'Acc': 0.7536231884057971, 'P': 0.6842105263157895, 'R': 0.8387096774193549, 'F1': 0.7536231884057971, 'Brier': 0.14533011272141705, 'thr': 0.35, 'n': 69}
[TEST] {'AUC': 0.6669407894736842, 'PRAUC': 0.6178296133776486, 'Acc': 0.6, 'P': 0.5416666666666666, 'R': 0.8125, 'F1': 0.65, 'Brier': 0.23112782563176681, 'thr': 0.35, 'n': 70

In [7]:
# E) Consolidación y guardado de calibraciones (compatible con variables de la Celda D actual)

from pathlib import Path
import numpy as np, pandas as pd, json

# --- 1) Verificaciones mínimas
required_vars = [
    'pid_val','y_val','pid_test','y_test',
    'lr_oof_cal_sig','lr_test_cal_sig','lr_oof_cal_iso','lr_test_cal_iso',
    'hgb_oof_cal_sig','hgb_test_cal_sig','hgb_oof_cal_iso','hgb_test_cal_iso'
]
missing = [v for v in required_vars if v not in globals()]
assert not missing, f"Faltan variables de la Celda D: {missing}"

# --- 2) Helpers de métrica (idénticos a D, por consistencia)
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_score, recall_score, f1_score, brier_score_loss

def metrics(y_true, y_score, thr=0.5):
    y_pred = (y_score >= thr).astype(int)
    return {
        'AUC': float(roc_auc_score(y_true, y_score)) if len(np.unique(y_true))==2 else np.nan,
        'PRAUC': float(average_precision_score(y_true, y_score)),
        'Acc': float(accuracy_score(y_true, y_pred)),
        'P': float(precision_score(y_true, y_pred, zero_division=0)),
        'R': float(recall_score(y_true, y_pred, zero_division=0)),
        'F1': float(f1_score(y_true, y_pred, zero_division=0)),
        'Brier': float(brier_score_loss(y_true, y_score)),
        'thr': float(thr),
        'n': int(len(y_true))
    }

def best_f1_threshold(y_true, y_score):
    grid = np.linspace(0.05, 0.95, 19)
    f1s = [f1_score(y_true, (y_score>=t).astype(int), zero_division=0) for t in grid]
    return float(grid[int(np.argmax(f1s))])

# --- 3) Construye DataFrames VAL/TEST con todas las columnas calibradas
VAL_df = pd.DataFrame({
    "patient_id": pid_val, "y_true": y_val,
    "LR_platt":  lr_oof_cal_sig, "LR_iso":  lr_oof_cal_iso,
    "HGB_platt": hgb_oof_cal_sig, "HGB_iso": hgb_oof_cal_iso
})
TEST_df = pd.DataFrame({
    "patient_id": pid_test, "y_true": y_test,
    "LR_platt":  lr_test_cal_sig, "LR_iso":  lr_test_cal_iso,
    "HGB_platt": hgb_test_cal_sig, "HGB_iso": hgb_test_cal_iso
})

# --- 4) Métricas resumen usando umbral óptimo por configuración (en VAL) y aplicándolo en TEST
def eval_pair(name, yv, sv, yt, st):
    thr = best_f1_threshold(yv, sv)
    return {
        "name": name,
        "thr": thr,
        "VAL": metrics(yv, sv, thr),
        "TEST": metrics(yt, st, thr)
    }

summary = [
    eval_pair("LR|Platt",     y_val, VAL_df["LR_platt"].values,  y_test, TEST_df["LR_platt"].values),
    eval_pair("LR|Isotonic",  y_val, VAL_df["LR_iso"].values,    y_test, TEST_df["LR_iso"].values),
    eval_pair("HGB|Platt",    y_val, VAL_df["HGB_platt"].values, y_test, TEST_df["HGB_platt"].values),
    eval_pair("HGB|Isotonic", y_val, VAL_df["HGB_iso"].values,   y_test, TEST_df["HGB_iso"].values),
]

# --- 5) Guardado
OUT = Path("/content/drive/MyDrive/CognitivaAI/p22_meta_ablation")
OUT.mkdir(parents=True, exist_ok=True)
VAL_df.to_csv(OUT / "p22_val_calibrations.csv", index=False)
TEST_df.to_csv(OUT / "p22_test_calibrations.csv", index=False)
with open(OUT / "p22_calibration_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print("💾 Guardados en:", OUT)
print(" - p22_val_calibrations.csv")
print(" - p22_test_calibrations.csv")
print(" - p22_calibration_summary.json")

print("\nResumen rápido:")
for row in summary:
    print(f"{row['name']} | thr={row['thr']:.2f} | "
          f"VAL F1={row['VAL']['F1']:.3f} AUC={row['VAL']['AUC']:.3f} | "
          f"TEST F1={row['TEST']['F1']:.3f} AUC={row['TEST']['AUC']:.3f}")


💾 Guardados en: /content/drive/MyDrive/CognitivaAI/p22_meta_ablation
 - p22_val_calibrations.csv
 - p22_test_calibrations.csv
 - p22_calibration_summary.json

Resumen rápido:
LR|Platt | thr=0.35 | VAL F1=0.682 AUC=0.732 | TEST F1=0.690 AUC=0.668
LR|Isotonic | thr=0.35 | VAL F1=0.754 AUC=0.859 | TEST F1=0.650 AUC=0.667
HGB|Platt | thr=0.30 | VAL F1=0.753 AUC=0.817 | TEST F1=0.633 AUC=0.702
HGB|Isotonic | thr=0.30 | VAL F1=0.773 AUC=0.887 | TEST F1=0.641 AUC=0.666


In [9]:
# F) Evaluación y reporte final (usa variables en memoria o carga desde CSV)
# - No depende de nombres previos; es robusto
# - Calcula umbral F1 en VAL por método y evalúa en TEST
# - Opcional: añade un blend simple LR_iso/HGB_iso
from pathlib import Path
import numpy as np, pandas as pd
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_score, recall_score, f1_score, brier_score_loss

# ==== 1) Helpers ====
def metrics(y_true, y_score, thr=0.5):
    y_pred = (y_score >= thr).astype(int)
    return {
        "AUC": float(roc_auc_score(y_true, y_score)) if len(np.unique(y_true))==2 else np.nan,
        "PRAUC": float(average_precision_score(y_true, y_score)),
        "Acc": float(accuracy_score(y_true, y_pred)),
        "P": float(precision_score(y_true, y_pred, zero_division=0)),
        "R": float(recall_score(y_true, y_pred, zero_division=0)),
        "F1": float(f1_score(y_true, y_pred, zero_division=0)),
        "Brier": float(brier_score_loss(y_true, y_score)),
        "thr": float(thr),
        "n": int(len(y_true))
    }

def best_f1_threshold(y_true, y_score):
    grid = np.linspace(0.05, 0.95, 19)
    f1s = [f1_score(y_true, (y_score>=t).astype(int), zero_division=0) for t in grid]
    return float(grid[int(np.argmax(f1s))])

# ==== 2) Detectar datos VAL/TEST y/o cargar CSV de D ====
OUT = Path("/content/drive/MyDrive/CognitivaAI/p22_meta_ablation")
val_csv  = OUT/"p22_val_calibrations.csv"
test_csv = OUT/"p22_test_calibrations.csv"

# y_true / patient_id
if 'VAL' in globals() and 'TEST' in globals():
    y_val   = VAL['y_true'].astype(int).values
    y_test  = TEST['y_true'].astype(int).values
    pid_val = VAL['patient_id'].astype(str).values
    pid_test= TEST['patient_id'].astype(str).values
    coh_val = (VAL['cohort'] if 'cohort' in VAL.columns else VAL['patient_id'].astype(str).str.startswith('OAS2').map({True:'OAS2',False:'OAS1'})).values
    coh_test= (TEST['cohort'] if 'cohort' in TEST.columns else TEST['patient_id'].astype(str).str.startswith('OAS2').map({True:'OAS2',False:'OAS1'})).values
else:
    # si no están en memoria, los recuperamos de los CSV generados en D
    assert val_csv.exists() and test_csv.exists(), "No encuentro VAL/TEST ni los CSV de calibración. Ejecuta la celda D primero."
    _v = pd.read_csv(val_csv)
    _t = pd.read_csv(test_csv)
    y_val   = _v['y_true'].astype(int).values
    y_test  = _t['y_true'].astype(int).values
    pid_val = _v['patient_id'].astype(str).values
    pid_test= _t['patient_id'].astype(str).values
    # inferimos cohort a partir del prefijo del ID
    coh_val = np.where(_v['patient_id'].astype(str).str.startswith('OAS2'), 'OAS2', 'OAS1')
    coh_test= np.where(_t['patient_id'].astype(str).str.startswith('OAS2'), 'OAS2', 'OAS1')

# ==== 3) Recolectar scores calibrados (desde memoria o CSV) ====
def get_vec(varname, fallback_df=None, fallback_col=None, n=None):
    if varname in globals():
        return globals()[varname]
    if fallback_df is not None and fallback_col is not None:
        return fallback_df[fallback_col].values
    raise KeyError(f"No se encontró '{varname}' ni columna '{fallback_col}' para cargar.")

# Cargar CSV si hace falta
_vdf = pd.read_csv(val_csv)  if val_csv.exists()  else None
_tdf = pd.read_csv(test_csv) if test_csv.exists() else None

scores = {}

# LR Platt / Iso
scores['LR_platt'] = (
    get_vec('lr_oof_cal_sig', _vdf, 'LR_platt', len(y_val)),
    get_vec('lr_test_cal_sig', _tdf, 'LR_platt', len(y_test))
)
scores['LR_iso'] = (
    get_vec('lr_oof_cal_iso', _vdf, 'LR_iso', len(y_val)),
    get_vec('lr_test_cal_iso', _tdf, 'LR_iso', len(y_test))
)

# HGB Platt / Iso
scores['HGB_platt'] = (
    get_vec('hgb_oof_cal_sig', _vdf, 'HGB_platt', len(y_val)),
    get_vec('hgb_test_cal_sig', _tdf, 'HGB_platt', len(y_test))
)
scores['HGB_iso'] = (
    get_vec('hgb_oof_cal_iso', _vdf, 'HGB_iso', len(y_val)),
    get_vec('hgb_test_cal_iso', _tdf, 'HGB_iso', len(y_test))
)

# Blend simple (por si queremos comparativa adicional)
scores['BLEND_iso'] = (
    0.5 * scores['LR_iso'][0] + 0.5 * scores['HGB_iso'][0],
    0.5 * scores['LR_iso'][1] + 0.5 * scores['HGB_iso'][1]
)

# ==== 4) Métricas global y por cohortes ====
def eval_block(name, val_scores, test_scores):
    thr = best_f1_threshold(y_val, val_scores)
    res_val  = metrics(y_val,  val_scores,  thr)
    res_test = metrics(y_test, test_scores, thr)

    # por cohortes
    out = {"name": name, "thr": thr, "VAL": res_val, "TEST": res_test, "COHORTS": {}}
    for cohort, mask in {"OAS1": coh_val=="OAS1", "OAS2": coh_val=="OAS2"}.items():
        if mask.sum() > 0:
            out["COHORTS"][f"VAL_{cohort}"] = metrics(y_val[mask],  val_scores[mask], thr)
    for cohort, mask in {"OAS1": coh_test=="OAS1", "OAS2": coh_test=="OAS2"}.items():
        if mask.sum() > 0:
            out["COHORTS"][f"TEST_{cohort}"] = metrics(y_test[mask], test_scores[mask], thr)
    return out

summary = []
for name, (v,t) in scores.items():
    print(f"\n== {name} ==")
    block = eval_block(name, v, t)
    print("[VAL]",  block["VAL"])
    print("[TEST]", block["TEST"])
    summary.append(block)

# ==== 5) Guardados ====
OUT.mkdir(parents=True, exist_ok=True)
pd.DataFrame({
    "patient_id": pid_val, "y_true": y_val,
    **{k: scores[k][0] for k in scores}
}).to_csv(OUT/"p22_val_all_methods.csv", index=False)

pd.DataFrame({
    "patient_id": pid_test, "y_true": y_test,
    **{k: scores[k][1] for k in scores}
}).to_csv(OUT/"p22_test_all_methods.csv", index=False)

import json
with open(OUT/"p22_final_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print("\n💾 Guardado en:", OUT)
print(" - p22_val_all_methods.csv")
print(" - p22_test_all_methods.csv")
print(" - p22_final_summary.json")




== LR_platt ==
[VAL] {'AUC': 0.7321731748726655, 'PRAUC': 0.7145407614908541, 'Acc': 0.6086956521739131, 'P': 0.5370370370370371, 'R': 0.9354838709677419, 'F1': 0.6823529411764706, 'Brier': 0.20752393411010403, 'thr': 0.35, 'n': 69}
[TEST] {'AUC': 0.6681743421052632, 'PRAUC': 0.6464341487486046, 'Acc': 0.6142857142857143, 'P': 0.5454545454545454, 'R': 0.9375, 'F1': 0.6896551724137931, 'Brier': 0.21893862322613, 'thr': 0.35, 'n': 70}

== LR_iso ==
[VAL] {'AUC': 0.8590831918505942, 'PRAUC': 0.8251318361887122, 'Acc': 0.7536231884057971, 'P': 0.6842105263157895, 'R': 0.8387096774193549, 'F1': 0.7536231884057971, 'Brier': 0.14533011272141705, 'thr': 0.35, 'n': 69}
[TEST] {'AUC': 0.6669407894736842, 'PRAUC': 0.6178296133776486, 'Acc': 0.6, 'P': 0.5416666666666666, 'R': 0.8125, 'F1': 0.65, 'Brier': 0.23112782563176681, 'thr': 0.35, 'n': 70}

== HGB_platt ==
[VAL] {'AUC': 0.8174872665534805, 'PRAUC': 0.7816516728423423, 'Acc': 0.7246376811594203, 'P': 0.6304347826086957, 'R': 0.9354838709677