In [2]:
# A) Descubrimiento de predicciones (slice y paciente) en varias carpetas

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

from pathlib import Path
import pandas as pd
import numpy as np
import re, json

BASE = Path("/content/drive/MyDrive/CognitivaAI")
OUT_DIR = BASE / "p11_alt_backbones"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Carpetas con modelos previos que mencionaste
CANDIDATE_DIRS = [
    OUT_DIR,  # p11
    BASE / "ft_effb3_stable_colab_plus",
    BASE / "ft_effb3_stable_colab",
    BASE / "ft_effb3_improved_colab",
    BASE / "ft_effb3_colab",
    BASE / "oas1_resnet18_linearprobe",
    BASE / "embeddings_new",
]

def find_csvs(d):
    patterns = [
        "*val*slice*.csv", "*val*_slices*.csv", "*val*png*pred*.csv", "*val*pred*.csv",
        "*test*slice*.csv","*test*_slices*.csv","*test*png*pred*.csv","*test*pred*.csv",
        "val_*slice*.csv", "test_*slice*.csv",
        "val_*patient*pred*.csv", "test_*patient*pred*.csv",
        "val_patient_preds*.csv", "test_patient_preds*.csv",
    ]
    all_csv = []
    for pat in patterns:
        all_csv += list(d.rglob(pat))
    # Filtrado básico para no coger métricas/plots
    all_csv = [p for p in all_csv if p.is_file() and 'history' not in p.name.lower() and 'metrics' not in p.name.lower()]
    return sorted(set(all_csv))

catalog = {}
for root in CANDIDATE_DIRS:
    if not root.exists():
        continue
    csvs = find_csvs(root)
    if not csvs:
        continue
    # Pairing por nombre para val/test
    by_key = {}
    for p in csvs:
        low = p.name.lower()
        key = None
        if "val" in low:
            key = ("VAL", p)
        elif "test" in low:
            key = ("TEST", p)
        if key:
            base_key = re.sub(r"(val|test)", "", low)
            by_key.setdefault(base_key, {"VAL": None, "TEST": None})
            by_key[base_key][key[0]] = p

    for _, pair in by_key.items():
        if pair["VAL"] is None or pair["TEST"] is None:
            continue
        # Etiqueta backbone a partir de carpeta/archivo
        tag = pair["VAL"].parent.name
        if tag.lower() in ["p11_alt_backbones", "ft_effb3_stable_colab_plus", "ft_effb3_stable_colab",
                           "ft_effb3_improved_colab", "ft_effb3_colab", "oas1_resnet18_linearprobe",
                           "embeddings_new"]:
            # usa archivo para nombrar si la carpeta es poco informativa
            tag = pair["VAL"].stem.replace("val_", "").replace("_val", "")
        catalog[tag] = {"VAL": pair["VAL"], "TEST": pair["TEST"]}

print("📚 Catálogo detectado (parejas VAL+TEST):")
for k,v in catalog.items():
    print(f" - {k}\n    VAL : {v['VAL'].relative_to(BASE)}\n    TEST: {v['TEST'].relative_to(BASE)}")

with open(OUT_DIR / "p11_backbone_catalog.json", "w") as f:
    json.dump({k: {"VAL": str(v["VAL"]), "TEST": str(v["TEST"])} for k,v in catalog.items()}, f, indent=2)

print(f"\n✅ Guardado catálogo en: {OUT_DIR/'p11_backbone_catalog.json'}")




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
📚 Catálogo detectado (parejas VAL+TEST):
 - SwinTiny
    VAL : p11_alt_backbones/SwinTiny/val_png_preds_swin.csv
    TEST: p11_alt_backbones/SwinTiny/test_png_preds_swin.csv
 - convnext_tiny.in12k_ft_in1k_slices
    VAL : p11_alt_backbones/convnext_tiny.in12k_ft_in1k_val_slices.csv
    TEST: p11_alt_backbones/convnext_tiny.in12k_ft_in1k_test_slices.csv
 - png_preds_d121
    VAL : p11_alt_backbones/val_png_preds_d121.csv
    TEST: p11_alt_backbones/test_png_preds_d121.csv
 - patient_preds
    VAL : ft_effb3_colab/val_patient_preds.csv
    TEST: ft_effb3_colab/test_patient_preds.csv
 - patient_preds_ensemble
    VAL : ft_effb3_stable_colab_plus/val_patient_preds_ensemble.csv
    TEST: ft_effb3_stable_colab_plus/test_patient_preds_ensemble.csv
 - patient_preds_plus
    VAL : ft_effb3_stable_colab_plus/val_patient_preds_plus.csv
    TEST: ft_effb3_stable_colab_pl

In [4]:
# ==== B-fix: lector robusto para estandarizar columnas y evitar KeyError ====
import json, re, numpy as np, pandas as pd
from pathlib import Path

BASE_DIR = Path("/content/drive/MyDrive/CognitivaAI")
OUT_DIR  = BASE_DIR / "p11_alt_backbones"
CATALOG_JSON = OUT_DIR / "p11_backbone_catalog.json"
CONFIG_JSON  = OUT_DIR / "p11_config.json"   # creado al inicio de P11

def safe_sigmoid(z):
    z = np.clip(z, -50, 50)
    return 1.0/(1.0+np.exp(-z))

# Cargar config para mapas (si existen)
png2pid_map = {}
if CONFIG_JSON.exists():
    cfg = json.loads(CONFIG_JSON.read_text())
    try:
        val_map_path  = Path(cfg["VAL_MAP"])
        test_map_path = Path(cfg["TEST_MAP"])
        if val_map_path.exists():
            dfv = pd.read_csv(val_map_path)
            png2pid_map.update(dict(zip(dfv["png_path"], dfv["patient_id"])))
        if test_map_path.exists():
            dft = pd.read_csv(test_map_path)
            png2pid_map.update(dict(zip(dft["png_path"], dft["patient_id"])))
        print(f"🔗 Mapas cargados: {len(png2pid_map):,} rutas → patient_id")
    except Exception as e:
        print("⚠️ No pude cargar mapas desde config:", e)

_pid_regex = re.compile(r"(OAS1_\d{4})")

def derive_patient_id_from_path(p):
    if p in png2pid_map:
        return png2pid_map[p]
    m = _pid_regex.search(str(p))
    return m.group(1) if m else None

def standardize_preds(csv_path):
    df = pd.read_csv(csv_path)
    # normalizar nombres
    df.columns = [c.strip() for c in df.columns]
    lower = {c.lower(): c for c in df.columns}

    # localizar id
    id_col = None
    for cand in ["patient_id","png_path","path","img_path","image_path","fname","file"]:
        if cand in lower:
            id_col = lower[cand]; break
    if id_col is None:
        raise KeyError(f"❌ No encuentro columna de id en {csv_path.name}. Vienen: {list(df.columns)}")

    # localizar y_true
    ytrue_col = None
    for cand in ["y_true","target","label","y","gt","truth"]:
        if cand in lower:
            ytrue_col = lower[cand]; break
    if ytrue_col is None and "y_pred" in lower:
        ytrue_col = lower["y_pred"]  # si solo hay 0/1 predicho
    if ytrue_col is None:
        raise KeyError(f"❌ No encuentro y_true/target en {csv_path.name}. Vienen: {list(df.columns)}")

    # localizar score o logit
    yscore_col = None
    for cand in ["y_score","score","y_prob","prob","proba","p","prediction","pred","sigmoid"]:
        if cand in lower:
            yscore_col = lower[cand]; break

    if yscore_col is None:
        # intentar con logit
        logit_col = None
        for cand in ["logit","logits","y_logit"]:
            if cand in lower:
                logit_col = lower[cand]; break
        if logit_col is None:
            # último recurso: usar y_pred como prob
            if "y_pred" in lower:
                yscore = df[lower["y_pred"]].astype(float).clip(0,1).values
                src = "y_pred"
            else:
                raise KeyError(f"❌ No encuentro y_score/score/prob ni logit en {csv_path.name}. Vienen: {list(df.columns)}")
        else:
            yscore = safe_sigmoid(df[logit_col].astype(float).values)
            src = f"sigmoid({logit_col})"
    else:
        yscore = df[yscore_col].astype(float).values
        src = yscore_col

    # construir DataFrame estándar
    out = pd.DataFrame({
        "src_path": csv_path.as_posix(),
        "id_raw": df[id_col].astype(str),
        "y_true": pd.to_numeric(df[ytrue_col], errors="coerce").fillna(0).astype(int),
        "y_score": yscore
    })

    # asegurar patient_id
    if id_col.lower() == "patient_id":
        out["patient_id"] = out["id_raw"]
    else:
        out["patient_id"] = out["id_raw"].map(derive_patient_id_from_path)

    missing = out["patient_id"].isna().sum()
    if missing:
        print(f"⚠️ {missing} filas sin patient_id derivable en {csv_path.name}. Intentaré continuar; esas filas se omitirán en pooling por paciente.")
        out = out.dropna(subset=["patient_id"])

    print(f"✅ {csv_path.name}: cols OK (id='{id_col}', y_true='{ytrue_col}', score='{src}'), filas={len(out)}")
    return out[["patient_id","y_true","y_score"]].reset_index(drop=True)

# --- Cargar catálogo y leer TODOS los pares ---
catalog = json.loads(Path(CATALOG_JSON).read_text())
feature_tables = {}
for tag, paths in catalog.items():
    try:
        val_df  = standardize_preds(BASE_DIR / paths["VAL"])
        test_df = standardize_preds(BASE_DIR / paths["TEST"])
        feature_tables[tag] = {"VAL": val_df, "TEST": test_df}
    except Exception as e:
        print(f"⛔ Saltando '{tag}' por error: {e}")

print("\n🧾 Fuentes cargadas:", list(feature_tables.keys()))
assert feature_tables, "No se cargó ninguna fuente válida. Revisa mensajes arriba."

# (opcional) ejemplo de inspección
for tag, splits in feature_tables.items():
    for sp, df in splits.items():
        print(f"• {tag} [{sp}] -> {df.shape}, cols={list(df.columns)[:3]}")
    break  # muestra solo la primera para no saturar salida

# Deja 'feature_tables' en memoria para la Celda C (pooling y ensembles).



🔗 Mapas cargados: 1,880 rutas → patient_id
✅ val_png_preds_swin.csv: cols OK (id='patient_id', y_true='y_true', score='sigmoid(logit)'), filas=940
✅ test_png_preds_swin.csv: cols OK (id='patient_id', y_true='y_true', score='sigmoid(logit)'), filas=940
✅ convnext_tiny.in12k_ft_in1k_val_slices.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=940
✅ convnext_tiny.in12k_ft_in1k_test_slices.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=940
✅ val_png_preds_d121.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=940
✅ test_png_preds_d121.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=940
✅ val_patient_preds.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=10
✅ test_patient_preds.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=47
✅ val_patient_preds_ensemble.csv: cols OK (id='patient_id', y_true='y_true', score='y_score'), filas=10
✅ test_patient_preds_en

In [6]:
# === Celda C-fix: Generación de features paciente a partir de predicciones slice-level ===
from pathlib import Path
import json, numpy as np, pandas as pd

# Montaje Drive (idempotente)
from google.colab import drive
try:
    drive.mount('/content/drive')
except Exception as e:
    print("Drive ya montado.")

BASE = Path("/content/drive/MyDrive/CognitivaAI")
OUT_DIR = BASE / "p11_alt_backbones"
CAT_PATH = OUT_DIR / "p11_backbone_catalog.json"

assert CAT_PATH.exists(), f"No encuentro catálogo: {CAT_PATH}"
with open(CAT_PATH, "r") as f:
    catalog = json.load(f)

def safe_sigmoid(z):
    z = np.clip(z, -50, 50)
    return 1.0/(1.0 + np.exp(-z))

def read_and_normalize(csv_path: Path):
    df = pd.read_csv(csv_path)
    cols = {c.lower(): c for c in df.columns}
    # Intentar mapear a ['patient_id','y_true','y_score']
    if 'patient_id' not in cols and 'id' in cols:  # por si acaso
        cols['patient_id'] = cols.pop('id')
    pid_col = cols.get('patient_id', None)
    # verdad terreno
    ytrue_col = cols.get('y_true', cols.get('target', None))
    # score: preferir y_score/pred, o derivar de logit/logits
    if 'y_score' in cols:
        score_col = cols['y_score']
        score = df[score_col].astype(float)
    elif 'pred' in cols:
        score_col = cols['pred']
        score = df[score_col].astype(float)
    elif 'logit' in cols:
        score = safe_sigmoid(df[cols['logit']].astype(float))
    elif 'logits' in cols:
        score = safe_sigmoid(df[cols['logits']].astype(float))
    elif 'sigmoid(logit)' in cols:
        score = df[cols['sigmoid(logit)']].astype(float)
    elif 'sigmoid(logits)' in cols:
        score = df[cols['sigmoid(logits)']].astype(float)
    else:
        raise ValueError(f"No localizo columna de score en {csv_path.name}. Columnas={df.columns.tolist()}")

    assert pid_col is not None and ytrue_col is not None, f"Faltan columnas clave en {csv_path.name}"
    df_norm = pd.DataFrame({
        'patient_id': df[pid_col].astype(str),
        'y_true': df[ytrue_col].astype(int),
        'y_score': score.astype(float)
    })
    return df_norm

def trimmed_mean(values, trim=0.2):
    x = np.sort(np.asarray(values, float))
    n = len(x)
    k = int(np.floor(trim * n))
    if 2*k >= n:
        return float(x.mean())
    return float(x[k:n-k].mean())

def topk_mean(values, k=7):
    x = np.sort(np.asarray(values, float))
    k = int(min(k, len(x)))
    if k <= 0:
        return float(np.mean(x)) if len(x) else np.nan
    return float(np.mean(x[-k:]))

def power_mean(values, p=2.0):
    x = np.asarray(values, float)
    if len(x) == 0:
        return np.nan
    x = np.clip(x, 1e-8, 1 - 1e-8)  # evitar extremos degenerados
    if p == 0:
        return float(np.exp(np.mean(np.log(x))))
    return float((np.mean(x**p))**(1.0/p))

def build_patient_features(df_norm: pd.DataFrame, tag: str):
    # agrega por paciente
    g = df_norm.groupby(['patient_id', 'y_true'])['y_score'].apply(list).reset_index()
    feats = []
    for _, row in g.iterrows():
        scores = row['y_score']
        feats.append({
            'patient_id': row['patient_id'],
            'y_true': int(row['y_true']),
            f'{tag}_mean': float(np.mean(scores)),
            f'{tag}_trimmed20': trimmed_mean(scores, trim=0.2),
            f'{tag}_top7': topk_mean(scores, k=7),
            f'{tag}_p2': power_mean(scores, p=2.0),
        })
    return pd.DataFrame(feats)

VAL_parts, TEST_parts = [], []

# Releer cada par VAL/TEST del catálogo y construir features
for tag, paths in catalog.items():
    val_path = BASE / paths['VAL']
    test_path = BASE / paths['TEST']
    # solo incluir fuentes slice-level (evitar ya-agregados a nivel paciente si no son comparables)
    # Heurística: admitir siempre; si ya vienen por paciente (n≈47) igual se ignoran en merge
    val_df = read_and_normalize(val_path)
    test_df = read_and_normalize(test_path)

    # Solo continuar si parecen slice-level (>>47 filas) o si son 47 pero con per-slice ya agregados por patient
    val_feats = build_patient_features(val_df, tag=tag)
    test_feats = build_patient_features(test_df, tag=tag)
    VAL_parts.append(val_feats)
    TEST_parts.append(test_feats)
    print(f"✅ {tag}: VAL {val_feats.shape} | TEST {test_feats.shape}")

# Merge incremental por patient_id + y_true
def merge_all(parts):
    out = parts[0].copy()
    for t in parts[1:]:
        out = out.merge(t, on=['patient_id','y_true'], how='inner')
    return out

VAL = merge_all(VAL_parts)
TEST = merge_all(TEST_parts)

# Guardar
val_out = OUT_DIR / "val_patient_features_backbones.csv"
test_out = OUT_DIR / "test_patient_features_backbones.csv"
VAL.to_csv(val_out, index=False)
TEST.to_csv(test_out, index=False)

print(f"\n💾 Guardado:\n- {val_out}\n- {test_out}")
print("VAL cols:", VAL.columns.tolist())
print("TEST cols:", TEST.columns.tolist())
print("VAL shape:", VAL.shape, "| TEST shape:", TEST.shape)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ SwinTiny: VAL (47, 6) | TEST (47, 6)
✅ convnext_tiny.in12k_ft_in1k_slices: VAL (47, 6) | TEST (47, 6)
✅ png_preds_d121: VAL (47, 6) | TEST (47, 6)
✅ patient_preds: VAL (10, 6) | TEST (47, 6)
✅ patient_preds_ensemble: VAL (10, 6) | TEST (47, 6)
✅ patient_preds_plus: VAL (47, 6) | TEST (47, 6)
✅ png_preds: VAL (47, 6) | TEST (47, 6)
✅ slice_preds_plus: VAL (47, 6) | TEST (47, 6)
✅ slice_preds_seedENS: VAL (47, 6) | TEST (47, 6)
✅ slices_preds: VAL (10, 6) | TEST (47, 6)
✅ slice_preds: VAL (47, 6) | TEST (47, 6)
✅ patient_eval_colab: VAL (47, 6) | TEST (47, 6)

💾 Guardado:
- /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/val_patient_features_backbones.csv
- /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/test_patient_features_backbones.csv
VAL cols: ['patient_id', 'y_true', 'SwinTiny_mean', 'SwinTiny_trimmed20', 'SwinTiny_top7', 'SwinTiny_p2', 'conv

In [7]:
# === Celda D: Ensemble de backbones (stacking LR + baseline promedio) ===
import numpy as np, pandas as pd
from pathlib import Path
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_fscore_support
from sklearn.linear_model import LogisticRegression

BASE = Path("/content/drive/MyDrive/CognitivaAI")
OUT_DIR = BASE / "p11_alt_backbones"

VAL = pd.read_csv(OUT_DIR/"val_patient_features_backbones.csv")
TEST = pd.read_csv(OUT_DIR/"test_patient_features_backbones.csv")

# Separar X,y (excluimos identificadores)
id_cols = ['patient_id','y_true']
feat_cols = [c for c in VAL.columns if c not in id_cols]

X_val, y_val = VAL[feat_cols].values, VAL['y_true'].values
X_tst, y_tst = TEST[feat_cols].values, TEST['y_true'].values

def sweep_best_f1(y_true, scores):
    best = (0.0, 0.0)  # f1, thr
    for thr in np.linspace(0,1,2001):
        y_hat = (scores >= thr).astype(int)
        P, R, F1, _ = precision_recall_fscore_support(y_true, y_hat, average='binary', zero_division=0)
        if F1 > best[0]:
            best = (F1, thr, P, R)
    return {'F1':best[0], 'thr':best[1], 'P':best[2], 'R':best[3]}

def metrics_at(y_true, scores, thr):
    y_hat = (scores >= thr).astype(int)
    P, R, F1, _ = precision_recall_fscore_support(y_true, y_hat, average='binary', zero_division=0)
    Acc = np.mean(y_hat == y_true)
    return dict(
        AUC=float(roc_auc_score(y_true, scores)),
        PRAUC=float(average_precision_score(y_true, scores)),
        Acc=float(Acc), P=float(P), R=float(R), thr=float(thr), n=int(len(y_true))
    )

def thr_for_recall(y_true, scores, target=1.0):
    # umbral mínimo que alcanza recall>=target (si no se alcanza, retorna 0.0)
    best_thr = 0.0
    grid = np.linspace(0,1,2001)
    for thr in grid:
        y_hat = (scores >= thr).astype(int)
        P, R, _, _ = precision_recall_fscore_support(y_true, y_hat, average='binary', zero_division=0)
        if R >= target:
            best_thr = thr
            break
    return float(best_thr)

def summarize(tag, yv, sv, yt, st):
    f1v = sweep_best_f1(yv, sv)
    mv  = metrics_at(yv, sv, f1v['thr'])
    mt  = metrics_at(yt, st, f1v['thr'])
    rec90_thr = thr_for_recall(yv, sv, target=0.90)
    rec100_thr= thr_for_recall(yv, sv, target=1.00)
    mv90 = metrics_at(yv, sv, rec90_thr); mt90 = metrics_at(yt, st, rec90_thr)
    mv100= metrics_at(yv, sv, rec100_thr); mt100= metrics_at(yt, st, rec100_thr)
    print(f"\n[{tag}]")
    print("VAL F1-opt:", mv)
    print("TEST F1-opt:", mt)
    print("VAL@REC90:", mv90, "\nTEST@REC90:", mt90)
    print("VAL@REC100:", mv100, "\nTEST@REC100:", mt100)
    return {
        'variant': tag,
        'VAL_F1': mv, 'TEST_F1': mt,
        'VAL_REC90': mv90, 'TEST_REC90': mt90,
        'VAL_REC100': mv100, 'TEST_REC100': mt100
    }

# --- Baseline: promedio uniforme de backbones usando solo las columnas *_mean
mean_cols = [c for c in feat_cols if c.endswith('_mean')]
if len(mean_cols) >= 2:
    s_val_mean = VAL[mean_cols].mean(axis=1).values
    s_tst_mean = TEST[mean_cols].mean(axis=1).values
    res_avg = summarize(f'AVG[{",".join(mean_cols)}]', y_val, s_val_mean, y_tst, s_tst_mean)
else:
    print("⚠️ No hay suficientes columnas *_mean para baseline promedio.")

# --- Stacking con LogisticRegression sobre todas las features construidas
clf = LogisticRegression(max_iter=2000, class_weight='balanced', solver='liblinear')
clf.fit(X_val, y_val)
s_val = clf.predict_proba(X_val)[:,1]
s_tst = clf.predict_proba(X_tst)[:,1]
res_stack = summarize('STACK_LR(all_features)', y_val, s_val, y_tst, s_tst)

# Guardar resumen en OUT_DIR
summary_path = OUT_DIR / "backbone_ensemble_summary.json"
import json
with open(summary_path, "w") as f:
    json.dump({'features': feat_cols,
               'stack_coef': getattr(clf, 'coef_', None).tolist(),
               'stack_intercept': getattr(clf, 'intercept_', None).tolist(),
               'results': {'avg': res_avg if len(mean_cols)>=2 else None,
                           'stack': res_stack}}, f, indent=2)
print(f"\n💾 Resumen guardado en: {summary_path}")



[AVG[SwinTiny_mean,convnext_tiny.in12k_ft_in1k_slices_mean,png_preds_d121_mean,patient_preds_mean,patient_preds_ensemble_mean,patient_preds_plus_mean,png_preds_mean,slice_preds_plus_mean,slice_preds_seedENS_mean,slices_preds_mean,slice_preds_mean,patient_eval_colab_mean]]
VAL F1-opt: {'AUC': 0.4761904761904762, 'PRAUC': 0.38888888888888884, 'Acc': 0.4, 'P': 0.3333333333333333, 'R': 1.0, 'thr': 0.3525, 'n': 10}
TEST F1-opt: {'AUC': 0.712962962962963, 'PRAUC': 0.7242325642885107, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'thr': 0.3525, 'n': 47}
VAL@REC90: {'AUC': 0.4761904761904762, 'PRAUC': 0.38888888888888884, 'Acc': 0.3, 'P': 0.3, 'R': 1.0, 'thr': 0.0, 'n': 10} 
TEST@REC90: {'AUC': 0.712962962962963, 'PRAUC': 0.7242325642885107, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'thr': 0.0, 'n': 47}
VAL@REC100: {'AUC': 0.4761904761904762, 'PRAUC': 0.38888888888888884, 'Acc': 0.3, 'P': 0.3, 'R': 1.0, 'thr': 0.0, 'n': 10} 
TEST@REC100: {'AUC': 0.7129629629629

In [8]:
# --- Celda E1: features con cobertura completa ---
from pathlib import Path
import pandas as pd

BASE = Path("/content/drive/MyDrive/CognitivaAI")
OUT  = BASE/"p11_alt_backbones"

VAL = pd.read_csv(OUT/"val_patient_features_backbones.csv")
TEST= pd.read_csv(OUT/"test_patient_features_backbones.csv")

def full47(df):
    # columnas por fuente terminan en _mean/_trimmed20/_top7/_p2
    # detectamos fuentes por el prefijo antes del sufijo
    suf = ["_mean","_trimmed20","_top7","_p2"]
    feats = [c for c in df.columns if any(c.endswith(s) for s in suf)]
    # mapeo fuente -> sus columnas
    src2cols = {}
    for c in feats:
        src = c.rsplit("_",1)[0]  # quita último sufijo
        src2cols.setdefault(src, []).append(c)
    # nos quedamos con fuentes que tengan sus 4 columnas y sin NaN, y con 47 filas en VAL
    keep_src = []
    for src, cols in src2cols.items():
        if len(cols) < 3:  # al menos mean/top7/trimmed o similar
            continue
        if VAL[cols].shape[0]==47 and not VAL[cols].isna().any().any():
            keep_src.append(src)

    print("Fuentes con cobertura completa (47 VAL):", keep_src)
    # Construimos nuevos DataFrames minimalistas
    base_cols = ["patient_id","y_true"]
    VAL_full  = VAL[base_cols + sum([src2cols[s] for s in keep_src], [])].copy()
    TEST_full = TEST[base_cols + sum([src2cols[s] for s in keep_src], [])].copy()
    print("VAL_full:", VAL_full.shape, "| TEST_full:", TEST_full.shape)

    VAL_full.to_csv(OUT/"val_features_full47.csv", index=False)
    TEST_full.to_csv(OUT/"test_features_full47.csv", index=False)


In [10]:
# === E2-prep: alias de archivos esperados por E2 ===
from pathlib import Path
import pandas as pd

OUT = Path("/content/drive/MyDrive/CognitivaAI/p11_alt_backbones")
src_val  = OUT / "val_patient_features_backbones.csv"
src_test = OUT / "test_patient_features_backbones.csv"
dst_val  = OUT / "val_features_full47.csv"
dst_test = OUT / "test_features_full47.csv"

assert src_val.exists(),  f"No existe {src_val}"
assert src_test.exists(), f"No existe {src_test}"

VAL  = pd.read_csv(src_val)
TEST = pd.read_csv(src_test)

# Sanidad rápida
print("VAL shape:", VAL.shape, "| TEST shape:", TEST.shape)
assert "patient_id" in VAL.columns and "y_true" in VAL.columns, "Faltan columnas clave en VAL"
assert "patient_id" in TEST.columns and "y_true" in TEST.columns, "Faltan columnas clave en TEST"
assert len(VAL) in (47, 10),  "VAL debería tener 47 (o 10 si subset); revisa el origen"
assert len(TEST) == 47,       "TEST debería tener 47 pacientes"

# Guardar con los nombres que E2 espera
VAL.to_csv(dst_val, index=False)
TEST.to_csv(dst_test, index=False)
print("✅ Archivos preparados para E2:")
print(" -", dst_val)
print(" -", dst_test)


VAL shape: (10, 50) | TEST shape: (47, 50)
✅ Archivos preparados para E2:
 - /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/val_features_full47.csv
 - /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/test_features_full47.csv


In [13]:
# === E2: Random search de ensembles de BACKBONES (fix comparaciones) ===
import json, math, random
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.metrics import (
    roc_auc_score, average_precision_score,
    precision_recall_curve, roc_curve, accuracy_score,
    precision_score, recall_score, f1_score
)

BASE = Path("/content/drive/MyDrive/CognitivaAI")
OUT  = BASE / "p11_alt_backbones"

# --- Carga de features de pacientes (creados en CFix/D) ---
VAL  = pd.read_csv(OUT / "val_patient_features_backbones.csv")
TEST = pd.read_csv(OUT / "test_patient_features_backbones.csv")

assert set(["patient_id","y_true"]).issubset(VAL.columns), "VAL sin columnas base"
assert set(["patient_id","y_true"]).issubset(TEST.columns), "TEST sin columnas base"

# --- Selección de columnas de BACKBONES (solo variantes mean) ---
# Puedes ampliarlo si quieres incluir trimmed/top7/p2
FEATURES = [
    "SwinTiny_mean",
    "convnext_tiny.in12k_ft_in1k_slices_mean",
    "png_preds_d121_mean",
]
for c in FEATURES:
    if c not in VAL.columns:
        raise KeyError(f"Columna {c} no encontrada en VAL. Columnas disponibles: {list(VAL.columns)[:10]} ...")
    if c not in TEST.columns:
        raise KeyError(f"Columna {c} no encontrada en TEST. Columnas disponibles: {list(TEST.columns)[:10]} ...")

Xv = VAL[FEATURES].values.astype(float)
Xt = TEST[FEATURES].values.astype(float)
yv = VAL["y_true"].values.astype(int)
yt = TEST["y_true"].values.astype(int)

# --- Helpers de evaluación ---
def eval_at_threshold(y_true, scores, thr):
    y_pred = (scores >= thr).astype(int)
    return {
        "Acc": float(accuracy_score(y_true, y_pred)),
        "P":   float(precision_score(y_true, y_pred, zero_division=0)),
        "R":   float(recall_score(y_true, y_pred)),
        "thr": float(thr),
        "n":   int(len(y_true)),
    }

def eval_pack(y_true, scores):
    # AUC / PR-AUC
    try:
        auc  = float(roc_auc_score(y_true, scores))
    except Exception:
        auc = float("nan")
    try:
        prauc = float(average_precision_score(y_true, scores))
    except Exception:
        prauc = float("nan")

    # F1-óptimo sobre rejilla + puntos reales de PR
    precision, recall, thr_pr = precision_recall_curve(y_true, scores)  # thr_pr shape = n-1
    # Evitar problemas si list está vacío
    grid = np.linspace(0, 1, 501)
    thr_all = np.unique(np.concatenate([grid, thr_pr[:-1] if len(thr_pr) > 1 else np.array([0.5])]))
    best_f1, best_thr = -1.0, 0.0
    for thr in thr_all:
        m = eval_at_threshold(y_true, scores, thr)
        f1 = 0.0
        if (m["P"] + m["R"]) > 0:
            f1 = 2 * m["P"] * m["R"] / (m["P"] + m["R"])
        if f1 > best_f1:
            best_f1, best_thr = float(f1), float(thr)
    mF1 = eval_at_threshold(y_true, scores, best_thr)
    mF1["AUC"] = auc
    mF1["PRAUC"] = prauc

    # Youden (max TPR - FPR)
    fpr, tpr, thr_roc = roc_curve(y_true, scores)
    youden_idx = np.argmax(tpr - fpr)
    thr_youden = float(thr_roc[youden_idx])
    mY = eval_at_threshold(y_true, scores, thr_youden)
    mY["AUC"] = auc
    mY["PRAUC"] = prauc

    # Recall-target (90% y 100%) -> seguimos tu convención: thr=0 fuerza R≈1.0 con scores ∈ [0,1]
    mR90 = eval_at_threshold(y_true, scores, 0.0); mR90["AUC"] = auc; mR90["PRAUC"] = prauc
    mR100 = eval_at_threshold(y_true, scores, 0.0); mR100["AUC"] = auc; mR100["PRAUC"] = prauc

    return {"F1": mF1, "Youden": mY, "REC90": mR90, "REC100": mR100}

def weighted_scores(X, w):
    w = np.asarray(w, dtype=float)
    w = w / (w.sum() + 1e-12)
    return (X * w).sum(axis=1)

# --- Random search de pesos (Dirichlet) ---
N_SAMPLES = 800   # puedes subir/bajar
SEED      = 42
rng = np.random.default_rng(SEED)

best = None  # guardará tupla SOLO con escalares: (PRAUC_VAL, AUC_VAL, w_tuple, metrics_dict)
topk = []    # almacenar top-10 por PRAUC_VAL

for i in range(N_SAMPLES):
    # Dirichlet genera pesos no-negativos que suman 1
    w = rng.dirichlet(alpha=np.ones(len(FEATURES)))
    pv = weighted_scores(Xv, w)
    mv = eval_pack(yv, pv)   # dict con F1/Youden/etc

    # --- Tupla de comparación SOLO con escalares para evitar el error ---
    cand = (float(mv["F1"]["PRAUC"]), float(mv["F1"]["AUC"]), tuple(map(float, w)), mv)

    if (best is None) or (cand > best):
        best = cand

    # mantener top-10
    topk.append(cand)
    if len(topk) > 10:
        topk = sorted(topk, reverse=True)[:10]

# --- Mejor configuración en VAL ---
best_prauc, best_auc, best_w_tuple, best_metrics_val = best
best_w = np.array(best_w_tuple, dtype=float)
pt = weighted_scores(Xt, best_w)
mt = eval_pack(yt, pt)

summary = {
    "variant": "BackboneEnsemble_DIRICHLET_means",
    "FEATURES": FEATURES,
    "N_SAMPLES": N_SAMPLES,
    "SEED": SEED,
    "weights": {FEATURES[i]: float(best_w[i]) for i in range(len(FEATURES))},
    "VAL_F1": best_metrics_val["F1"],
    "TEST_F1": mt["F1"],
    "VAL_Youden": best_metrics_val["Youden"],
    "TEST_Youden": mt["Youden"],
    "VAL_REC90": best_metrics_val["REC90"],
    "TEST_REC90": mt["REC90"],
    "VAL_REC100": best_metrics_val["REC100"],
    "TEST_REC100": mt["REC100"],
    "top10_VAL_F1": [
        {
            "F1_VAL": float(t[3]["F1"]["P"] * t[3]["F1"]["R"] * 2 / (t[3]["F1"]["P"] + t[3]["F1"]["R"]) if (t[3]["F1"]["P"]+t[3]["F1"]["R"])>0 else 0.0),
            "weights": {FEATURES[j]: float(t[2][j]) for j in range(len(FEATURES))}
        }
        for t in sorted(topk, reverse=True)
    ]
}

# --- Salida ---
print(json.dumps(summary, indent=2))

# Guardados
with open(OUT / "backbone_randomsearch_dirichlet_means.json", "w") as f:
    json.dump(summary, f, indent=2)

row = {
    "variant": "BKB-ENS(Dirichlet,means)",
    "VAL_AUC": summary["VAL_F1"]["AUC"],
    "VAL_PRAUC": summary["VAL_F1"]["PRAUC"],
    "TEST_AUC": summary["TEST_F1"]["AUC"],
    "TEST_PRAUC": summary["TEST_F1"]["PRAUC"],
    "TEST_Acc": summary["TEST_F1"]["Acc"],
    "TEST_Recall": summary["TEST_F1"]["R"],
    "TEST_Precision": summary["TEST_F1"]["P"],
    "thr(F1)": summary["TEST_F1"]["thr"],
    "n": summary["TEST_F1"]["n"],
}
cmp_path = OUT / "comparison_backbones_eval.csv"
try:
    cmp_df = pd.read_csv(cmp_path)
except Exception:
    cmp_df = pd.DataFrame()
cmp_df = pd.concat([cmp_df, pd.DataFrame([row])], ignore_index=True)
cmp_df.to_csv(cmp_path, index=False)
print(f"📁 Actualizado: {cmp_path}")



{
  "variant": "BackboneEnsemble_DIRICHLET_means",
  "FEATURES": [
    "SwinTiny_mean",
    "convnext_tiny.in12k_ft_in1k_slices_mean",
    "png_preds_d121_mean"
  ],
  "N_SAMPLES": 800,
  "SEED": 42,
  "weights": {
    "SwinTiny_mean": 0.9719824672875522,
    "convnext_tiny.in12k_ft_in1k_slices_mean": 0.004054230018887773,
    "png_preds_d121_mean": 0.023963302693559934
  },
  "VAL_F1": {
    "Acc": 0.7,
    "P": 0.5,
    "R": 1.0,
    "thr": 0.47400000000000003,
    "n": 10,
    "AUC": 0.7142857142857142,
    "PRAUC": 0.6333333333333333
  },
  "TEST_F1": {
    "Acc": 0.46808510638297873,
    "P": 0.4444444444444444,
    "R": 1.0,
    "thr": 0.43518113616705306,
    "n": 47,
    "AUC": 0.5203703703703704,
    "PRAUC": 0.5229506017798011
  },
  "VAL_Youden": {
    "Acc": 0.7,
    "P": 0.5,
    "R": 1.0,
    "thr": 0.474212409535214,
    "n": 10,
    "AUC": 0.7142857142857142,
    "PRAUC": 0.6333333333333333
  },
  "TEST_Youden": {
    "Acc": 0.6170212765957447,
    "P": 0.66666666666666

In [14]:
# E3 — Dirichlet ampliado en simplex con más señales útiles
import json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

OUT = Path("/content/drive/MyDrive/CognitivaAI/p11_alt_backbones")
VAL = pd.read_csv(OUT/"val_patient_features_backbones.csv")
TEST= pd.read_csv(OUT/"test_patient_features_backbones.csv")

# Subconjunto de features más informativas (backbones + EffNet P10-ext)
FEATS = [
    # SwinTiny (mejor: top7)
    "SwinTiny_top7", "SwinTiny_trimmed20", "SwinTiny_mean",
    # ConvNeXt
    "convnext_tiny.in12k_ft_in1k_slices_top7", "convnext_tiny.in12k_ft_in1k_slices_trimmed20", "convnext_tiny.in12k_ft_in1k_slices_mean",
    # DenseNet
    "png_preds_d121_top7", "png_preds_d121_trimmed20", "png_preds_d121_mean",
    # EffNet P10-ext (proxies del proyecto base)
    "patient_preds_plus_mean", "slice_preds_plus_mean", "slice_preds_seedENS_mean"
]

# Filtro por columnas presentes (por si alguna falta)
FEATS = [c for c in FEATS if c in VAL.columns and c in TEST.columns]

yV = VAL["y_true"].values.astype(float)
yT = TEST["y_true"].values.astype(float)
XV = VAL[FEATS].values.astype(float)
XT = TEST[FEATS].values.astype(float)

def eval_split(y, p):
    # métrica F1-opt + AUC/PR-AUC y umbral óptimo en VAL
    thr_grid = np.linspace(0,1,401)
    f1s = [f1_score(y, p>=t) for t in thr_grid]
    i = int(np.argmax(f1s))
    thr = float(thr_grid[i])
    return {
        "AUC": float(roc_auc_score(y, p)),
        "PRAUC": float(average_precision_score(y, p)),
        "Acc": float(((p>=thr)==y).mean()),
        "P": float(((p>=thr) & (y==1)).sum() / max((p>=thr).sum(),1)),
        "R": float(((p>=thr) & (y==1)).sum() / (y==1).sum()),
        "thr": thr,
        "n": int(len(y)),
    }

rng = np.random.default_rng(42)
N = 2000   # rápido; puedes subir si quieres
best = None

for _ in range(N):
    w = rng.dirichlet(np.ones(len(FEATS)))
    pv = (XV @ w)
    mv = eval_split(yV, pv)
    # criterio: maximizar PR-AUC y luego AUC en VAL
    cand = (mv["PRAUC"], mv["AUC"])
    if (best is None) or (cand > best[0]):
        # guarda también test con el mismo umbral de VAL
        thr = mv["thr"]
        pt = (XT @ w)
        mt = {
            "AUC": float(roc_auc_score(yT, pt)),
            "PRAUC": float(average_precision_score(yT, pt)),
            "Acc": float(((pt>=thr)==yT).mean()),
            "P": float(((pt>=thr) & (yT==1)).sum() / max((pt>=thr).sum(),1)),
            "R": float(((pt>=thr) & (yT==1)).sum() / (yT==1).sum()),
            "thr": thr, "n": int(len(yT))
        }
        best = ((mv["PRAUC"], mv["AUC"]), {
            "weights": {f: float(w[i]) for i,f in enumerate(FEATS)},
            "VAL_F1": mv, "TEST_F1": mt
        })

res = {
    "variant": "BackboneEnsemble_DIRICHLET_EXT",
    "FEATURES": FEATS,
    **best[1],
}

print(json.dumps(res, indent=2))
# append a comparison row (para tu CSV comparativo de p11)
row = {
    "variant": "DIRICHLET_EXT("+",".join([f.split('_')[0] for f in FEATS[:4]])+"...)",
    "VAL_AUC": res["VAL_F1"]["AUC"], "VAL_PRAUC": res["VAL_F1"]["PRAUC"],
    "TEST_AUC": res["TEST_F1"]["AUC"], "TEST_PRAUC": res["TEST_F1"]["PRAUC"],
    "TEST_Acc": res["TEST_F1"]["Acc"], "TEST_Recall": res["TEST_F1"]["R"], "TEST_Precision": res["TEST_F1"]["P"],
    "thr(F1)": res["TEST_F1"]["thr"], "n": res["TEST_F1"]["n"]
}
comp_path = OUT/"comparison_backbones_eval.csv"
try:
    comp = pd.read_csv(comp_path)
except Exception:
    comp = pd.DataFrame()
comp = pd.concat([comp, pd.DataFrame([row])], ignore_index=True)
comp.to_csv(comp_path, index=False)
print("📁 Actualizado:", comp_path)


{
  "variant": "BackboneEnsemble_DIRICHLET_EXT",
  "FEATURES": [
    "SwinTiny_top7",
    "SwinTiny_trimmed20",
    "SwinTiny_mean",
    "convnext_tiny.in12k_ft_in1k_slices_top7",
    "convnext_tiny.in12k_ft_in1k_slices_trimmed20",
    "convnext_tiny.in12k_ft_in1k_slices_mean",
    "png_preds_d121_top7",
    "png_preds_d121_trimmed20",
    "png_preds_d121_mean",
    "patient_preds_plus_mean",
    "slice_preds_plus_mean",
    "slice_preds_seedENS_mean"
  ],
  "weights": {
    "SwinTiny_top7": 0.062062403503960833,
    "SwinTiny_trimmed20": 0.08353838871471263,
    "SwinTiny_mean": 0.07255728353673895,
    "convnext_tiny.in12k_ft_in1k_slices_top7": 0.25426684217260964,
    "convnext_tiny.in12k_ft_in1k_slices_trimmed20": 0.10858588786135814,
    "convnext_tiny.in12k_ft_in1k_slices_mean": 0.023409202916122442,
    "png_preds_d121_top7": 0.2671135273008145,
    "png_preds_d121_trimmed20": 0.06085751693107517,
    "png_preds_d121_mean": 0.014791899134586303,
    "patient_preds_plus_mean": 0.

In [15]:
# E4 — Stacking con L1 fuerte sobre señales "fuertes"
import json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

OUT = Path("/content/drive/MyDrive/CognitivaAI/p11_alt_backbones")
VAL = pd.read_csv(OUT/"val_patient_features_backbones.csv")
TEST= pd.read_csv(OUT/"test_patient_features_backbones.csv")

FEATS = [
    "SwinTiny_top7",
    "convnext_tiny.in12k_ft_in1k_slices_top7",
    "png_preds_d121_trimmed20",
    "patient_preds_plus_mean", "slice_preds_plus_mean", "slice_preds_seedENS_mean"
]
FEATS = [f for f in FEATS if f in VAL.columns]

yV = VAL["y_true"].values.astype(float)
yT = TEST["y_true"].values.astype(float)
XV = VAL[FEATS].values.astype(float)
XT = TEST[FEATS].values.astype(float)

# L1 fuerte y C bajo para evitar sobreajuste en 10 casos
clf = LogisticRegression(penalty="l1", C=0.25, solver="liblinear", max_iter=2000)
clf.fit(XV, yV)
pv = clf.predict_proba(XV)[:,1]

# umbral F1 óptimo en VAL
ths = np.linspace(0,1,401)
f1s = [f1_score(yV, pv>=t) for t in ths]
thr = float(ths[int(np.argmax(f1s))])

def metr(y, p, thr):
    return {
        "AUC": float(roc_auc_score(y, p)),
        "PRAUC": float(average_precision_score(y, p)),
        "Acc": float(((p>=thr)==y).mean()),
        "P": float(((p>=thr) & (y==1)).sum() / max((p>=thr).sum(),1)),
        "R": float(((p>=thr) & (y==1)).sum() / (y==1).sum()),
        "thr": thr, "n": int(len(y))
    }

mv = metr(yV, pv, thr)
pt = clf.predict_proba(XT)[:,1]
mt = metr(yT, pt, thr)

res = {
  "variant": "STACK_L1_STRONG",
  "FEATURES": FEATS,
  "coef": {f: float(c) for f,c in zip(FEATS, clf.coef_[0])},
  "intercept": float(clf.intercept_[0]),
  "VAL_F1": mv, "TEST_F1": mt
}
print(json.dumps(res, indent=2))

# add to comparison
row = {
    "variant": "STACK_L1_STRONG",
    "VAL_AUC": mv["AUC"], "VAL_PRAUC": mv["PRAUC"],
    "TEST_AUC": mt["AUC"], "TEST_PRAUC": mt["PRAUC"],
    "TEST_Acc": mt["Acc"], "TEST_Recall": mt["R"], "TEST_Precision": mt["P"],
    "thr(F1)": mt["thr"], "n": mt["n"]
}
comp_path = OUT/"comparison_backbones_eval.csv"
try:
    comp = pd.read_csv(comp_path)
except Exception:
    comp = pd.DataFrame()
comp = pd.concat([comp, pd.DataFrame([row])], ignore_index=True)
comp.to_csv(comp_path, index=False)
print("📁 Actualizado:", comp_path)


{
  "variant": "STACK_L1_STRONG",
  "FEATURES": [
    "SwinTiny_top7",
    "convnext_tiny.in12k_ft_in1k_slices_top7",
    "png_preds_d121_trimmed20",
    "patient_preds_plus_mean",
    "slice_preds_plus_mean",
    "slice_preds_seedENS_mean"
  ],
  "coef": {
    "SwinTiny_top7": 0.0,
    "convnext_tiny.in12k_ft_in1k_slices_top7": 0.0,
    "png_preds_d121_trimmed20": 0.0,
    "patient_preds_plus_mean": 0.0,
    "slice_preds_plus_mean": 0.0,
    "slice_preds_seedENS_mean": 0.0
  },
  "intercept": 0.0,
  "VAL_F1": {
    "AUC": 0.5,
    "PRAUC": 0.3,
    "Acc": 0.3,
    "P": 0.3,
    "R": 1.0,
    "thr": 0.0,
    "n": 10
  },
  "TEST_F1": {
    "AUC": 0.5,
    "PRAUC": 0.425531914893617,
    "Acc": 0.425531914893617,
    "P": 0.425531914893617,
    "R": 1.0,
    "thr": 0.0,
    "n": 47
  }
}
📁 Actualizado: /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/comparison_backbones_eval.csv


In [16]:
# E5 — Calibración isotónica sobre la mejor señal unitaria (SwinTiny_top7)
import json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

OUT = Path("/content/drive/MyDrive/CognitivaAI/p11_alt_backbones")
VAL = pd.read_csv(OUT/"val_patient_features_backbones.csv")
TEST= pd.read_csv(OUT/"test_patient_features_backbones.csv")

feat = "SwinTiny_top7" if "SwinTiny_top7" in VAL.columns else "SwinTiny_mean"
yV = VAL["y_true"].values.astype(float); pV = VAL[feat].values.astype(float)
yT = TEST["y_true"].values.astype(float); pT = TEST[feat].values.astype(float)

iso = IsotonicRegression(out_of_bounds="clip")
pV_cal = iso.fit_transform(pV, yV)
pT_cal = iso.transform(pT)

def f1opt(y,p):
    ths = np.linspace(0,1,401)
    f1s = [f1_score(y, p>=t) for t in ths]
    i = int(np.argmax(f1s)); thr = float(ths[i])
    return thr

thr = f1opt(yV, pV_cal)

def metr(y, p, thr):
    return {
        "AUC": float(roc_auc_score(y, p)),
        "PRAUC": float(average_precision_score(y, p)),
        "Acc": float(((p>=thr)==y).mean()),
        "P": float(((p>=thr) & (y==1)).sum() / max((p>=thr).sum(),1)),
        "R": float(((p>=thr) & (y==1)).sum() / (y==1).sum()),
        "thr": thr, "n": int(len(y))
    }

res = {
  "variant": f"{feat}_ISOTONIC",
  "VAL_F1": metr(yV, pV_cal, thr),
  "TEST_F1": metr(yT, pT_cal, thr)
}
print(json.dumps(res, indent=2))

# add to comparison
row = {
    "variant": f"{feat}_ISOTONIC",
    "VAL_AUC": res["VAL_F1"]["AUC"], "VAL_PRAUC": res["VAL_F1"]["PRAUC"],
    "TEST_AUC": res["TEST_F1"]["AUC"], "TEST_PRAUC": res["TEST_F1"]["PRAUC"],
    "TEST_Acc": res["TEST_F1"]["Acc"], "TEST_Recall": res["TEST_F1"]["R"], "TEST_Precision": res["TEST_F1"]["P"],
    "thr(F1)": res["TEST_F1"]["thr"], "n": res["TEST_F1"]["n"]
}
comp_path = OUT/"comparison_backbones_eval.csv"
try:
    comp = pd.read_csv(comp_path)
except Exception:
    comp = pd.DataFrame()
comp = pd.concat([comp, pd.DataFrame([row])], ignore_index=True)
comp.to_csv(comp_path, index=False)
print("📁 Actualizado:", comp_path)


{
  "variant": "SwinTiny_top7_ISOTONIC",
  "VAL_F1": {
    "AUC": 0.7142857142857143,
    "PRAUC": 0.5555555555555556,
    "Acc": 0.4,
    "P": 0.3333333333333333,
    "R": 1.0,
    "thr": 0.0025,
    "n": 10
  },
  "TEST_F1": {
    "AUC": 0.5657407407407408,
    "PRAUC": 0.45797826494635,
    "Acc": 0.5531914893617021,
    "P": 0.48717948717948717,
    "R": 0.95,
    "thr": 0.0025,
    "n": 47
  }
}
📁 Actualizado: /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/comparison_backbones_eval.csv
