In [1]:
# === Celda A–auto-rescate: localiza VAL/TEST en todo MyDrive y fija rutas ===
from pathlib import Path
import pandas as pd, json, os

# 1) Montaje idempotente
try:
    from google.colab import drive
    if not (Path("/content/drive").exists() and os.listdir("/content/drive")):
        drive.mount('/content/drive')
    else:
        print("✅ Drive ya montado.")
except Exception as e:
    print("⚠️ Aviso montaje:", e)

MYDRIVE = Path("/content/drive/MyDrive")

# 2) Buscar los maps por nombre en TODO MyDrive (robusto ante carpetas movidas/renombradas)
def find_one(pattern):
    hits = list(MYDRIVE.rglob(pattern))
    return hits[0] if hits else None

val_map = find_one("oas1_val_colab_mapped.csv")
test_map = find_one("oas1_test_colab_mapped.csv")

print("🔎 VAL_MAP:", val_map if val_map else "⛔ no encontrado")
print("🔎 TEST_MAP:", test_map if test_map else "⛔ no encontrado")

if not val_map or not test_map:
    # Ayuda de depuración: lista posibles carpetas candidatas llamadas 'CognitivaAI*' y 'oas1_*'
    print("\n📂 Sondeo de carpetas candidatas:")
    cand = [p for p in MYDRIVE.glob("**/CognitivaAI*") if p.is_dir()]
    for p in sorted(cand)[:15]:
        print("  -", p)
    cand2 = [p for p in MYDRIVE.glob("**/oas1_*") if p.is_dir()]
    for p in sorted(cand2)[:15]:
        print("  -", p)
    raise FileNotFoundError("No localicé VAL/TEST maps. Revisa el listado anterior para la ruta real.")

# 3) DATA_DIR: padre común si coincide, si no, usa el padre de VAL_MAP
DATA_DIR = val_map.parent if val_map.parent == test_map.parent else val_map.parent

# 4) OUT_DIR para P11, bajo la carpeta CognitivaAI más cercana a DATA_DIR
#    (sube hasta encontrar 'CognitivaAI', si no, usa MyDrive/CognitivaAI)
def find_ancestor_named(path: Path, name: str):
    p = path
    for _ in range(10):
        if p.name == name:
            return p
        if p.parent == p:
            break
        p = p.parent
    return None

proj_root = find_ancestor_named(DATA_DIR, "CognitivaAI") or (MYDRIVE / "CognitivaAI")
OUT_DIR = proj_root / "p11_alt_backbones"
GRAPHS_DIR = OUT_DIR / "graphs_from_metrics"
OUT_DIR.mkdir(parents=True, exist_ok=True); GRAPHS_DIR.mkdir(parents=True, exist_ok=True)

print("\n📁 PROJECT_DIR:", proj_root)
print("📁 DATA_DIR   :", DATA_DIR)
print("📁 OUT_DIR    :", OUT_DIR)

# 5) Validación de columnas mínimas
val_df  = pd.read_csv(val_map)
test_df = pd.read_csv(test_map)
need = {"png_path","target","patient_id"}
assert need.issubset(val_df.columns),  f"VAL_MAP debe contener {sorted(need)}; tiene {list(val_df.columns)}"
assert need.issubset(test_df.columns), f"TEST_MAP debe contener {sorted(need)}; tiene {list(test_df.columns)}"
print("✅ Columnas OK:", sorted(need), "| VAL cols:", list(val_df.columns))

# 6) Guardar configuración para el resto del pipeline
cfg = {
  "PROJECT_DIR": str(proj_root),
  "DATA_DIR": str(DATA_DIR),
  "OUT_DIR": str(OUT_DIR),
  "VAL_MAP": str(val_map),
  "TEST_MAP": str(test_map),
  "TRAIN_MAP": None  # opcional; se puede localizar igual si lo necesitas
}
with open(OUT_DIR/"p11_config.json","w") as f:
    json.dump(cfg, f, indent=2)
print("💾 Config guardada en:", OUT_DIR/"p11_config.json")

# 7) (opcional) muestra filas para verificar rutas absolutas
display(val_df.head(2)); display(test_df.head(2))





Mounted at /content/drive
🔎 VAL_MAP: /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_val_colab_mapped.csv
🔎 TEST_MAP: /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_test_colab_mapped.csv

📁 PROJECT_DIR: /content/drive/MyDrive/CognitivaAI
📁 DATA_DIR   : /content/drive/MyDrive/CognitivaAI/oas1_data
📁 OUT_DIR    : /content/drive/MyDrive/CognitivaAI/p11_alt_backbones
✅ Columnas OK: ['patient_id', 'png_path', 'target'] | VAL cols: ['png_path', 'target', 'patient_id', 'scan_id', 'source_hdr', 'has_mask']
💾 Config guardada en: /content/drive/MyDrive/CognitivaAI/p11_alt_backbones/p11_config.json


Unnamed: 0,png_path,target,patient_id,scan_id,source_hdr,has_mask
0,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0003,OAS1_0003_MR1,DATA\OAS1_RAW\OAS1_0003_MR1\RAW\OAS1_0003_MR1_...,1
1,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0003,OAS1_0003_MR1,DATA\OAS1_RAW\OAS1_0003_MR1\RAW\OAS1_0003_MR1_...,1


Unnamed: 0,png_path,target,patient_id,scan_id,source_hdr,has_mask
0,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,0,OAS1_0002,OAS1_0002_MR1,DATA\OAS1_RAW\OAS1_0002_MR1\RAW\OAS1_0002_MR1_...,1
1,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,0,OAS1_0002,OAS1_0002_MR1,DATA\OAS1_RAW\OAS1_0002_MR1\RAW\OAS1_0002_MR1_...,1


In [3]:
# --- Backbone trainer (timm) con fine-tune corto ---
!pip -q install timm==1.0.9

import torch, timm, numpy as np, pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split

# Candidatos TRAIN_MAP
TRAIN_CANDIDATES = [
    DATA_DIR / "oas1_train_colab_mapped.csv",
    DATA_DIR / "oas1_train_mapped.csv",
    DATA_DIR / "oas1_train.csv",
]
TRAIN_CANDIDATES = [p for p in TRAIN_CANDIDATES if p and Path(p).exists()]
TRAIN_MAP = TRAIN_CANDIDATES[0] if TRAIN_CANDIDATES else None

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BACKBONE = "convnext_tiny.in12k_ft_in1k"  # alternativas: "resnet50", "tf_efficientnet_b4_ns"
EPOCHS = 4
BATCH  = 32
LR     = 2e-4
NUM_WORKERS = 2

print("🔧 Backbone:", BACKBONE)

# Dataset
class PNGSet(Dataset):
  def __init__(self, df, tfm):
    self.df = df.reset_index(drop=True)
    self.tfm = tfm
  def __len__(self): return len(self.df)
  def __getitem__(self, i):
    r = self.df.iloc[i]
    img = Image.open(r["png_path"]).convert("RGB")
    if self.tfm: img = self.tfm(img)
    y = int(r["target"])
    pid = r["patient_id"]
    return img, y, pid

tf_train = transforms.Compose([
  transforms.Resize((224,224)),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor()
])
tf_eval = transforms.Compose([
  transforms.Resize((224,224)),
  transforms.ToTensor()
])

# Particiones
if TRAIN_MAP:
  train_df = pd.read_csv(TRAIN_MAP)
  for col in ["png_path","target","patient_id"]: assert col in train_df.columns
  tr_df, va_df = train_df, val_df
  print(f"🧪 Modo TRAIN_MAP: train={len(tr_df)} | val={len(va_df)} | test={len(test_df)}")
else:
  # split interno en VAL para poder ajustar algo
  tr_df, va_df = train_df, _ = train_test_split(val_df, test_size=0.2, random_state=42, stratify=val_df["target"])
  print(f"🧪 Modo sin TRAIN_MAP: train={len(tr_df)} | val(interna)={len(va_df)} | TEST={len(test_df)}")

dl_tr = DataLoader(PNGSet(tr_df, tf_train), batch_size=BATCH, shuffle=True,  num_workers=NUM_WORKERS)
dl_va = DataLoader(PNGSet(va_df, tf_eval),  batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS)

# Modelo
m = timm.create_model(BACKBONE, pretrained=True, num_classes=1)
m = m.to(device)
crit = nn.BCEWithLogitsLoss()
opt  = torch.optim.AdamW(m.parameters(), lr=LR)

def run_epoch(model, loader, train=True):
  model.train(train)
  tot, n = 0.0, 0
  for x,y,_ in loader:
    x,y = x.to(device), y.float().to(device)
    with torch.set_grad_enabled(train):
      z = model(x).squeeze(1)
      loss = crit(z, y)
      if train:
        opt.zero_grad(); loss.backward(); opt.step()
    tot += loss.item()*len(x); n+=len(x)
  return tot/n

best = (1e9, None)
for ep in range(1, EPOCHS+1):
  tr_loss = run_epoch(m, dl_tr, True)
  va_loss = run_epoch(m, dl_va, False)
  print(f"Epoch {ep}/{EPOCHS} | train {tr_loss:.4f} | val {va_loss:.4f}")
  if va_loss < best[0]:
    best = (va_loss, OUT_DIR/f"{BACKBONE.replace('/','_')}_best.pth")
    torch.save(m.state_dict(), best[1])
print("💾 Mejor checkpoint:", best[1].name)


🔧 Backbone: convnext_tiny.in12k_ft_in1k
🧪 Modo TRAIN_MAP: train=2820 | val=940 | test=940


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/114M [00:00<?, ?B/s]

Epoch 1/4 | train 0.7567 | val 0.6886
Epoch 2/4 | train 0.6888 | val 0.6839
Epoch 3/4 | train 0.6886 | val 0.6934
Epoch 4/4 | train 0.6883 | val 0.6887
💾 Mejor checkpoint: convnext_tiny.in12k_ft_in1k_best.pth


In [4]:
# --- Inferencia a nivel slice para VAL y TEST ---
from torch.utils.data import DataLoader
import numpy as np, torch

ckpt = OUT_DIR/f"{BACKBONE.replace('/','_')}_best.pth"
assert ckpt.exists(), "No se encontró el checkpoint entrenado"
m.load_state_dict(torch.load(ckpt, map_location=device))
m.eval()

def infer_to_csv(df, name, tfm):
  ds = PNGSet(df, tfm)
  dl = DataLoader(ds, batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS)
  rows=[]
  with torch.no_grad():
    for x,y,pid in dl:
      x = x.to(device)
      z = m(x).squeeze(1).cpu().numpy()   # logits
      rows += list(zip(pid, y.numpy().astype(int), z))
  out = pd.DataFrame(rows, columns=["patient_id","y_true","logit"])
  out["y_score"] = 1/(1+np.exp(-out["logit"].values))
  out.to_csv(OUT_DIR/f"{name}.csv", index=False)
  print("💾 Guardado:", (OUT_DIR/f"{name}.csv").name, "| rows:", len(out))
  return out

val_slices = infer_to_csv(val_df,  f"{BACKBONE}_val_slices",  tf_eval)
test_slices= infer_to_csv(test_df, f"{BACKBONE}_test_slices", tf_eval)


💾 Guardado: convnext_tiny.in12k_ft_in1k_val_slices.csv | rows: 940
💾 Guardado: convnext_tiny.in12k_ft_in1k_test_slices.csv | rows: 940


In [6]:
# --- Aggregación patient-level + métricas y comparación ---
import numpy as np, pandas as pd, json
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score, precision_score, recall_score, confusion_matrix

CMP = proj_root / "ft_effb3_stable_colab_plus" / "comparison_patient_level_eval.csv"  # reutilizamos el mismo dashboard

def agg_patient(df, how="mean"):
  g = df.groupby("patient_id")
  if how=="mean":   s = g["y_score"].mean()
  elif how=="trimmed20":
    s = g["y_score"].apply(lambda v: np.mean(np.sort(v)[int(0.2*len(v)): int(0.8*len(v))] if len(v)>5 else np.mean(v)))
  elif how=="top7":
    s = g["y_score"].apply(lambda v: np.mean(np.sort(v)[-min(7,len(v)):]))
  else:
    raise ValueError("how no soportado")
  y = g["y_true"].max()  # todos iguales por paciente
  return y.values.astype(int), s.values

def thr_f1(y,p):
  th = np.linspace(0,1,2001)
  f1 = [f1_score(y,(p>=t).astype(int),zero_division=0) for t in th]
  return float(th[int(np.argmax(f1))])

def thr_youden(y,p):
  ths=np.linspace(0,1,2001); best=-1; best_t=0.5
  for t in ths:
    yhat=(p>=t).astype(int)
    tn,fp,fn,tp = confusion_matrix(y,yhat).ravel()
    sens=tp/(tp+fn) if (tp+fn)>0 else 0
    spec=tn/(tn+fp) if (tn+fp)>0 else 0
    J=sens+spec-1
    if J>best: best, best_t = J, float(t)
  return best_t

def metrics(y,p,thr):
  yhat=(p>=thr).astype(int)
  return dict(
    AUC=float(roc_auc_score(y,p)),
    PRAUC=float(average_precision_score(y,p)),
    Acc=float((yhat==y).mean()),
    P=float(precision_score(y,yhat,zero_division=0)),
    R=float(recall_score(y,yhat,zero_division=0)),
    thr=float(thr),
    n=int(len(y))
  )

rows=[]
for variant in ["mean","trimmed20","top7"]:
  vy, pv = agg_patient(val_slices,  variant)
  ty, pt = agg_patient(test_slices, variant)
  thrF = thr_f1(vy,pv); thrY = thr_youden(vy,pv)
  name = f"P11-{BACKBONE}-{variant}"
  rows += [
    dict(variant=name, split="VAL",  scheme="F1",     **metrics(vy,pv,thrF)),
    dict(variant=name, split="TEST", scheme="F1",     **metrics(ty,pt,thrF)),
    dict(variant=name, split="VAL",  scheme="YOUDEN", **metrics(vy,pv,thrY)),
    dict(variant=name, split="TEST", scheme="YOUDEN", **metrics(ty,pt,thrY)),
  ]

cmp = pd.read_csv(CMP) if CMP.exists() else pd.DataFrame(columns=rows[0].keys())
cmp = pd.concat([cmp, pd.DataFrame(rows)], ignore_index=True)
cmp.to_csv(CMP, index=False)
print("📁 Actualizado:", CMP)
pd.DataFrame(rows)


📁 Actualizado: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/comparison_patient_level_eval.csv


Unnamed: 0,variant,split,scheme,AUC,PRAUC,Acc,P,R,thr,n
0,P11-convnext_tiny.in12k_ft_in1k-mean,VAL,F1,0.55463,0.542833,0.425532,0.425532,1.0,0.0,47
1,P11-convnext_tiny.in12k_ft_in1k-mean,TEST,F1,0.510185,0.479049,0.425532,0.425532,1.0,0.0,47
2,P11-convnext_tiny.in12k_ft_in1k-mean,VAL,YOUDEN,0.55463,0.542833,0.425532,0.425532,1.0,0.0,47
3,P11-convnext_tiny.in12k_ft_in1k-mean,TEST,YOUDEN,0.510185,0.479049,0.425532,0.425532,1.0,0.0,47
4,P11-convnext_tiny.in12k_ft_in1k-trimmed20,VAL,F1,0.547222,0.534767,0.425532,0.425532,1.0,0.0,47
5,P11-convnext_tiny.in12k_ft_in1k-trimmed20,TEST,F1,0.500926,0.472317,0.425532,0.425532,1.0,0.0,47
6,P11-convnext_tiny.in12k_ft_in1k-trimmed20,VAL,YOUDEN,0.547222,0.534767,0.425532,0.425532,1.0,0.0,47
7,P11-convnext_tiny.in12k_ft_in1k-trimmed20,TEST,YOUDEN,0.500926,0.472317,0.425532,0.425532,1.0,0.0,47
8,P11-convnext_tiny.in12k_ft_in1k-top7,VAL,F1,0.542593,0.515774,0.425532,0.425532,1.0,0.0,47
9,P11-convnext_tiny.in12k_ft_in1k-top7,TEST,F1,0.511111,0.464268,0.425532,0.425532,1.0,0.0,47
