In [1]:
# Celda 0: Dependencias y configuración
!pip -q install timm==1.0.9 --no-deps

import os, math, time, json, random
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms

import timm
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, accuracy_score, precision_score, recall_score

from google.colab import drive
drive.mount('/content/drive')

SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# Rutas
DATA_DIR = "/content/drive/MyDrive/CognitivaAI/oas1_data"
CSV_TRAIN = f"{DATA_DIR}/oas1_train_colab_mapped.csv"
CSV_VAL   = f"{DATA_DIR}/oas1_val_colab_mapped.csv"
CSV_TEST  = f"{DATA_DIR}/oas1_test_colab_mapped.csv"
OUT_DIR   = "/content/drive/MyDrive/CognitivaAI/ft_effb3_colab"
os.makedirs(OUT_DIR, exist_ok=True)

# Hiperparámetros base
IMG_SIZE   = 300            # recomendado para EfficientNet-B3
BATCH_SIZE = 32             # T4 friendly (ajusta a 24-40 si falta memoria)
NUM_WORKERS= 2
EPOCHS     = 12             # entrenamiento corto con early stopping
BASE_LR    = 3e-4
WD         = 1e-4
PATIENCE   = 4              # early stopping


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.3/2.3 MB[0m [31m132.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Device: cuda


In [2]:
# Celda 1: Dataset MRI slices y DataLoaders

class MRISliceDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        df = pd.read_csv(csv_path)
        assert {'png_path','target','patient_id','scan_id'}.issubset(df.columns), "CSV con columnas requeridas"
        self.paths = df['png_path'].astype(str).tolist()
        self.labels = df['target'].astype(int).to_numpy()
        self.pids = df['patient_id'].astype(str).to_numpy()
        self.sids = df['scan_id'].astype(str).to_numpy()
        self.transform = transform

    def __len__(self): return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        img = Image.open(path).convert('L')  # imágenes axiales en escala de grises
        img = img.resize((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
        img = np.array(img, dtype=np.float32) / 255.0
        img = np.stack([img, img, img], axis=0)  # 1->3 canales
        if self.transform:
            # transform de torchvision espera PIL o tensor HWC; convertimos
            img_t = transforms.functional.to_pil_image(img.transpose(1,2,0))
            img_t = self.transform(img_t)
        else:
            img_t = torch.from_numpy(img)
        y = self.labels[idx]
        return img_t, y, self.pids[idx], self.sids[idx], path

# Transforms
mean_std = ([0.485,0.456,0.406],[0.229,0.224,0.225])
train_tfms = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.85,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=7),
    transforms.ToTensor(),
    transforms.Normalize(*mean_std),
])
eval_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(*mean_std),
])

ds_tr = MRISliceDataset(CSV_TRAIN, transform=train_tfms)
ds_va = MRISliceDataset(CSV_VAL,   transform=eval_tfms)
ds_te = MRISliceDataset(CSV_TEST,  transform=eval_tfms)

# Sampler balanceado por clase (opcional pero útil)
class_counts = np.bincount(ds_tr.labels, minlength=2)
w_neg, w_pos = 1.0/class_counts[0], 1.0/class_counts[1]
sample_weights = np.where(ds_tr.labels==1, w_pos, w_neg)
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

dl_tr = DataLoader(ds_tr, batch_size=BATCH_SIZE, sampler=sampler,  num_workers=NUM_WORKERS, pin_memory=True)
dl_va = DataLoader(ds_va, batch_size=BATCH_SIZE, shuffle=False,     num_workers=NUM_WORKERS, pin_memory=True)
dl_te = DataLoader(ds_te, batch_size=BATCH_SIZE, shuffle=False,     num_workers=NUM_WORKERS, pin_memory=True)

print("TRAIN slices:", len(ds_tr), "| VAL:", len(ds_va), "| TEST:", len(ds_te))
print("Class counts train:", class_counts, "→ pos_weight≈", round(class_counts[0]/max(1,class_counts[1]),3))


TRAIN slices: 2820 | VAL: 940 | TEST: 940
Class counts train: [1620 1200] → pos_weight≈ 1.35


In [3]:
# Celda 2: Modelo EfficientNet-B3 con fine-tuning parcial

BACKBONE = "tf_efficientnet_b3_ns"  # timm
model = timm.create_model(BACKBONE, pretrained=True, num_classes=0, in_chans=3)
feat_dim = model.num_features

# Head ligera
head = nn.Sequential(
    nn.Dropout(p=0.3),
    nn.Linear(feat_dim, 1)
)

net = nn.Sequential(model, head).to(DEVICE)

# Congelar todo menos el último bloque del backbone + head
for p in model.parameters():
    p.requires_grad = False

# Descongelar el último bloque de EfficientNet-B3
# Identificamos módulos finales típicos en timm
for name, module in model.named_modules():
    last_block = ('blocks.6', 'blocks.7')  # por si la variante incluye más
    if any(name.startswith(lb) for lb in last_block):
        for p in module.parameters():
            p.requires_grad = True

# Head entrenable
for p in head.parameters():
    p.requires_grad = True

# Optimizador
trainable_params = [p for p in net.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(trainable_params, lr=BASE_LR, weight_decay=WD)
# Cosine schedule
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# Pérdida con pos_weight
pos_weight = torch.tensor([class_counts[0]/max(1,class_counts[1])], device=DEVICE, dtype=torch.float32)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

print("Trainable params:", sum(p.numel() for p in net.parameters() if p.requires_grad))


  model = create_fn(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

Trainable params: 3285755


In [4]:
# Celda 3: Entrenamiento

def run_epoch(dataloader, train=True):
    net.train(train)
    total_loss = 0.0
    logits_all, y_all = [], []
    scaler = torch.cuda.amp.GradScaler(enabled=True)
    for xb, yb, *_ in dataloader:
        xb, yb = xb.to(DEVICE, non_blocking=True), yb.float().to(DEVICE)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(True):
            logits = net(xb).squeeze(1)
            loss = criterion(logits, yb)
        if train:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        total_loss += loss.item() * xb.size(0)
        logits_all.append(logits.detach().float().cpu().numpy())
        y_all.append(yb.detach().float().cpu().numpy())
    if not train:
        with torch.no_grad():
            pass
    if train:
        scheduler.step()
    y_all = np.concatenate(y_all)
    logits_all = np.concatenate(logits_all)
    probs_all = 1/(1+np.exp(-logits_all))
    # Métricas slice-level
    auc = roc_auc_score(y_all, probs_all) if len(np.unique(y_all))>1 else np.nan
    pr  = average_precision_score(y_all, probs_all) if len(np.unique(y_all))>1 else np.nan
    # Thr 0.5
    yhat = (probs_all >= 0.5).astype(int)
    acc = accuracy_score(y_all, yhat)
    pre = precision_score(y_all, yhat, zero_division=0)
    rec = recall_score(y_all, yhat, zero_division=0)
    brier = np.mean((probs_all - y_all)**2)
    return {
        "loss": float(total_loss/len(dataloader.dataset)),
        "auc": float(auc) if not np.isnan(auc) else None,
        "pr": float(pr) if not np.isnan(pr) else None,
        "acc": float(acc),
        "pre": float(pre),
        "rec": float(rec),
        "brier": float(brier),
        "probs": probs_all.tolist(), # Convert numpy array to list
        "logits": logits_all.tolist(), # Convert numpy array to list
        "y": y_all.tolist() # Convert numpy array to list
    }

best_val = -np.inf
pat = 0
hist = []

for epoch in range(1, EPOCHS+1):
    tr = run_epoch(dl_tr, train=True)
    va = run_epoch(dl_va, train=False)
    # criterio: PR-AUC slice en VAL (más sensible a clase positiva)
    score = va["pr"] if va["pr"] is not None else va["auc"]
    hist.append({"epoch":epoch, "train":tr, "val":va})
    print(f"[{epoch:02d}] TR loss={tr['loss']:.4f} | VAL AUC={va['auc']:.3f} PR-AUC={va['pr']:.3f} Brier={va['brier']:.3f}")
    if score is not None and score > best_val:
        best_val = score
        pat = 0
        torch.save(net.state_dict(), f"{OUT_DIR}/best_ft_effb3.pth")
    else:
        pat += 1
        if pat >= PATIENCE:
            print("→ Early stopping.")
            break

# Guardar historial
with open(f"{OUT_DIR}/train_history.json","w") as f:
    json.dump(hist, f)
print("Entrenamiento finalizado. Mejor PR-AUC VAL:", round(best_val,4))

  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[01] TR loss=0.7466 | VAL AUC=0.674 PR-AUC=0.558 Brier=0.237


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[02] TR loss=0.6407 | VAL AUC=0.652 PR-AUC=0.563 Brier=0.237


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[03] TR loss=0.5669 | VAL AUC=0.666 PR-AUC=0.568 Brier=0.246


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[04] TR loss=0.5075 | VAL AUC=0.671 PR-AUC=0.583 Brier=0.242


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[05] TR loss=0.4643 | VAL AUC=0.665 PR-AUC=0.583 Brier=0.249


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[06] TR loss=0.4370 | VAL AUC=0.655 PR-AUC=0.577 Brier=0.257


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[07] TR loss=0.3698 | VAL AUC=0.667 PR-AUC=0.573 Brier=0.260


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[08] TR loss=0.3477 | VAL AUC=0.666 PR-AUC=0.572 Brier=0.277


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[09] TR loss=0.3292 | VAL AUC=0.661 PR-AUC=0.568 Brier=0.278
→ Early stopping.
Entrenamiento finalizado. Mejor PR-AUC VAL: 0.5833


In [5]:
# Celda 4: Inferencia + pooling paciente (mean y attention)

# Cargar mejor modelo
net.load_state_dict(torch.load(f"{OUT_DIR}/best_ft_effb3.pth", map_location=DEVICE))
net.eval()

@torch.no_grad()
def infer(dataloader):
    logits_all, y_all, pids_all = [], [], []
    for xb, yb, pids, *_ in dataloader:
        xb = xb.to(DEVICE)
        lg = net(xb).squeeze(1)
        logits_all.append(lg.float().cpu().numpy())
        y_all.append(yb.numpy())
        pids_all += list(pids)
    logits = np.concatenate(logits_all)
    y = np.concatenate(y_all)
    probs = 1/(1+np.exp(-logits))
    return logits, probs, y, np.array(pids_all)

log_tr, pr_tr, y_tr, pid_tr = infer(dl_tr)
log_va, pr_va, y_va, pid_va = infer(dl_va)
log_te, pr_te, y_te, pid_te = infer(dl_te)

def patient_pool_mean(probs, labels, pids):
    df = pd.DataFrame({"pid":pids, "y":labels, "p":probs})
    g = df.groupby("pid")
    p_pool = g["p"].mean().values
    y_pool = g["y"].mean().round().astype(int).values
    return y_pool, p_pool, g.size().values

def patient_pool_attention(logits, labels, pids, temp=1.0):
    # Atención softmax sobre |logits| como importancias (simple, estable)
    df = pd.DataFrame({"pid":pids, "y":labels, "z":logits})
    outs = []
    for pid, grp in df.groupby("pid"):
        z = grp["z"].values
        # pesos ~ softmax(|z|/T) para resaltar slices informativos
        w = np.exp(np.abs(z)/temp); w = w / (w.sum()+1e-8)
        p = 1/(1+np.exp(-z))
        p_att = (w*p).sum()
        y = int(round(grp["y"].mean()))
        outs.append((pid, y, p_att, len(grp)))
    outs = pd.DataFrame(outs, columns=["pid","y","p","n"])
    return outs["y"].values, outs["p"].values, outs["n"].values

def eval_patient(y, p, thr=0.5):
    auc = roc_auc_score(y, p) if len(np.unique(y))>1 else np.nan
    pr  = average_precision_score(y, p) if len(np.unique(y))>1 else np.nan
    yhat = (p>=thr).astype(int)
    acc = accuracy_score(y, yhat)
    pre = precision_score(y, yhat, zero_division=0)
    rec = recall_score(y, yhat, zero_division=0)
    return {"AUC":auc,"PR-AUC":pr,"Acc":acc,"P":pre,"R":rec,"thr":thr,"n":len(y)}

# Mean pooling
yV_m, pV_m, _ = patient_pool_mean(pr_va, y_va, pid_va)
yT_m, pT_m, _ = patient_pool_mean(pr_te, y_te, pid_te)

# Attention pooling
yV_a, pV_a, _ = patient_pool_attention(log_va, y_va, pid_va, temp=1.0)
yT_a, pT_a, _ = patient_pool_attention(log_te, y_te, pid_te, temp=1.0)

print("VAL (mean@0.5):", eval_patient(yV_m, pV_m, 0.5))
print("TEST(mean@0.5):", eval_patient(yT_m, pT_m, 0.5))
print("VAL (attn@0.5):", eval_patient(yV_a, pV_a, 0.5))
print("TEST(attn@0.5):", eval_patient(yT_a, pT_a, 0.5))


VAL (mean@0.5): {'AUC': np.float64(0.7388888888888889), 'PR-AUC': np.float64(0.6587843825001534), 'Acc': 0.6382978723404256, 'P': 0.6, 'R': 0.45, 'thr': 0.5, 'n': 47}
TEST(mean@0.5): {'AUC': np.float64(0.875925925925926), 'PR-AUC': np.float64(0.7626011139703089), 'Acc': 0.723404255319149, 'P': 0.7333333333333333, 'R': 0.55, 'thr': 0.5, 'n': 47}
VAL (attn@0.5): {'AUC': np.float64(0.7611111111111111), 'PR-AUC': np.float64(0.6851116491294511), 'Acc': 0.6382978723404256, 'P': 0.6153846153846154, 'R': 0.4, 'thr': 0.5, 'n': 47}
TEST(attn@0.5): {'AUC': np.float64(0.8722222222222222), 'PR-AUC': np.float64(0.764498046830885), 'Acc': 0.7659574468085106, 'P': 0.8461538461538461, 'R': 0.55, 'thr': 0.5, 'n': 47}


In [6]:
# Celda 5: Temperature scaling (ajuste en VAL) y evaluación paciente

class TemperatureScaler(nn.Module):
    def __init__(self):
        super().__init__()
        self.logT = nn.Parameter(torch.zeros(1))  # T = exp(logT) >= 1

    def forward(self, logits):
        T = torch.exp(self.logT) + 1e-6
        return logits / T

def fit_temperature(logits_val, y_val, max_iter=2000, lr=0.01):
    y = torch.tensor(y_val, dtype=torch.float32, device=DEVICE)
    z = torch.tensor(logits_val, dtype=torch.float32, device=DEVICE)
    ts = TemperatureScaler().to(DEVICE)
    opt = torch.optim.LBFGS(ts.parameters(), lr=lr, max_iter=50, line_search_fn="strong_wolfe")

    bce = nn.BCEWithLogitsLoss()
    def closure():
        opt.zero_grad(set_to_none=True)
        zT = ts(z)
        loss = bce(zT, y)
        loss.backward()
        return loss

    last = 1e9
    for _ in range(30):
        loss = opt.step(closure)
        if abs(loss.item()-last) < 1e-7:
            break
        last = loss.item()
    with torch.no_grad():
        T = torch.exp(ts.logT).item() + 1e-6
    return ts, T

ts, T_val = fit_temperature(log_va, y_va)
print("Temperatura ajustada (VAL):", round(T_val,4))

def apply_T(logits, T):
    return logits / (T + 1e-6)

# Aplicar T
pV_m_T = 1/(1+np.exp(-apply_T(log_va, T_val)))
pT_m_T = 1/(1+np.exp(-apply_T(log_te, T_val)))

# Recalcular pooling
yV_mean, pV_mean, _ = patient_pool_mean(pV_m_T, y_va, pid_va)
yT_mean, pT_mean, _ = patient_pool_mean(pT_m_T, y_te, pid_te)

yV_attn, pV_attn, _ = patient_pool_attention(apply_T(log_va, T_val), y_va, pid_va, temp=1.0)
yT_attn, pT_attn, _ = patient_pool_attention(apply_T(log_te, T_val), y_te, pid_te, temp=1.0)

print("VAL mean (temp):", eval_patient(yV_mean, pV_mean, 0.5))
print("TEST mean(temp):", eval_patient(yT_mean, pT_mean, 0.5))
print("VAL attn (temp):", eval_patient(yV_attn, pV_attn, 0.5))
print("TEST attn(temp):", eval_patient(yT_attn, pT_attn, 0.5))


Temperatura ajustada (VAL): 2.6732
VAL mean (temp): {'AUC': np.float64(0.7481481481481482), 'PR-AUC': np.float64(0.664989747813566), 'Acc': 0.6382978723404256, 'P': 0.6, 'R': 0.45, 'thr': 0.5, 'n': 47}
TEST mean(temp): {'AUC': np.float64(0.8759259259259259), 'PR-AUC': np.float64(0.7620865452057403), 'Acc': 0.723404255319149, 'P': 0.7333333333333333, 'R': 0.55, 'thr': 0.5, 'n': 47}
VAL attn (temp): {'AUC': np.float64(0.75), 'PR-AUC': np.float64(0.660088903151692), 'Acc': 0.6382978723404256, 'P': 0.6153846153846154, 'R': 0.4, 'thr': 0.5, 'n': 47}
TEST attn(temp): {'AUC': np.float64(0.8777777777777778), 'PR-AUC': np.float64(0.7617757509275546), 'Acc': 0.723404255319149, 'P': 0.7333333333333333, 'R': 0.55, 'thr': 0.5, 'n': 47}


In [7]:
# Celda 6: Umbral clínico (VAL recall≥0.90) y evaluación en TEST

def pick_threshold_for_recall(y, p, min_recall=0.90):
    prec, rec, thr = precision_recall_curve(y, p)
    # precision_recall_curve devuelve thr para todos menos el primer punto
    thr = np.append(thr, 1.0)  # para igualar longitudes
    # buscamos el primer punto con recall >= min_recall que maximice precisión
    mask = (rec >= min_recall)
    if mask.any():
        idx = np.argmax(prec[mask])
        thr_sel = thr[mask][idx]
        return float(thr_sel), float(prec[mask][idx]), float(rec[mask][idx])
    else:
        # si no hay, devolvemos el que más recall tenga
        idx = np.argmax(rec)
        return float(thr[idx]), float(prec[idx]), float(rec[idx])

# Elegimos el *mejor pooling* en VAL (entre mean y attn tras temperature scaling) por PR-AUC
def pr_auc(y,p):
    return average_precision_score(y,p) if len(np.unique(y))>1 else np.nan

pr_val_mean = pr_auc(yV_mean, pV_mean)
pr_val_attn = pr_auc(yV_attn, pV_attn)
use_attn = (pr_val_attn > pr_val_mean)
print(f"Comparativa VAL PR-AUC: mean={pr_val_mean:.3f} | attn={pr_val_attn:.3f} → usar {'ATTN' if use_attn else 'MEAN'}")

if use_attn:
    thr, prec, rec = pick_threshold_for_recall(yV_attn, pV_attn, min_recall=0.90)
    val_metrics  = eval_patient(yV_attn, pV_attn, thr)
    test_metrics = eval_patient(yT_attn, pT_attn, thr)
else:
    thr, prec, rec = pick_threshold_for_recall(yV_mean, pV_mean, min_recall=0.90)
    val_metrics  = eval_patient(yV_mean, pV_mean, thr)
    test_metrics = eval_patient(yT_mean, pT_mean, thr)

print(f"→ Umbral clínico (VAL recall≥0.90): thr={thr:.4f} | precision={prec:.3f} | recall={rec:.3f}")
print("[VAL-final]", val_metrics)
print("[TEST-final]", test_metrics)

# Guardar resumen
res = {
    "pooling_used": "attention" if use_attn else "mean",
    "temperature": T_val,
    "threshold": thr,
    "val_metrics": val_metrics,
    "test_metrics": test_metrics
}
with open(f"{OUT_DIR}/ft_effb3_patient_eval.json","w") as f:
    json.dump(res, f, indent=2)
print("Resumen guardado en:", f"{OUT_DIR}/ft_effb3_patient_eval.json")


Comparativa VAL PR-AUC: mean=0.665 | attn=0.660 → usar MEAN
→ Umbral clínico (VAL recall≥0.90): thr=0.3651 | precision=0.588 | recall=1.000
[VAL-final] {'AUC': np.float64(0.7481481481481482), 'PR-AUC': np.float64(0.664989747813566), 'Acc': 0.7021276595744681, 'P': 0.5882352941176471, 'R': 1.0, 'thr': 0.3651449978351593, 'n': 47}
[TEST-final] {'AUC': np.float64(0.8759259259259259), 'PR-AUC': np.float64(0.7620865452057403), 'Acc': 0.7446808510638298, 'P': 0.625, 'R': 1.0, 'thr': 0.3651449978351593, 'n': 47}
Resumen guardado en: /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json


In [8]:
# ===============================================
# 🔒 Celda robusta: lee ft_effb3_colab/*.json → genera ROC/PR/Calibración/Confusión → descarga
# No depende de variables del notebook. Solo de los archivos que tu propio pipeline guarda.
# ===============================================
import os, json, numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score, brier_score_loss, confusion_matrix

BASE = "ft_effb3_colab"
EVAL_JSON = os.path.join(BASE, "ft_effb3_patient_eval.json")
HIST_JSON = os.path.join(BASE, "train_history.json")
OUTDIR = os.path.join(BASE, "graphs")
os.makedirs(OUTDIR, exist_ok=True)

# 0) Comprobaciones mínimas
assert os.path.exists(EVAL_JSON), f"No existe {EVAL_JSON}. Vuelve a ejecutar las celdas del notebook que lo generan."

# 1) Cargar JSON de evaluación y detectar campos
with open(EVAL_JSON, "r", encoding="utf-8") as f:
    d = json.load(f)

def _extract_patient_arrays(d):
    # Formato A: {"patients":[{"patient_id":..,"y_true":..,"y_score"/"logits":..}, ...], "threshold":..., "calibration":{"T":...}}
    if isinstance(d.get("patients"), list) and d["patients"]:
        ids = [str(it.get("patient_id", it.get("id", i))) for i,it in enumerate(d["patients"])]
        y   = [int(it.get("y_true", it.get("label", 0))) for it in d["patients"]]
        p   = []
        lg  = []
        for it in d["patients"]:
            if "y_score" in it: p.append(float(it["y_score"]))
            elif "prob" in it:  p.append(float(it["prob"]))
            elif "probs" in it: p.append(float(it["probs"]))
            elif "logits" in it: lg.append(float(it["logits"]))
        return np.array(ids), np.array(y), (np.array(p) if p else None), (np.array(lg) if lg else None)

    # Formato B: claves sueltas
    if "patient_id" in d and "y_true" in d:
        ids = np.array([str(x) for x in d["patient_id"]])
        y   = np.array(d["y_true"]).astype(int)
        p   = d.get("y_score") or d.get("prob") or d.get("probs")
        lg  = d.get("logits")
        p   = (np.array(p) if p is not None else None)
        lg  = (np.array(lg) if lg is not None else None)
        return ids, y, p, lg
    return None, None, None, None

ids, y, p, lg = _extract_patient_arrays(d)
assert ids is not None, "No se pudieron extraer arrays de pacientes del JSON. Imprime d y revisa sus claves."

# Calibración por T si solo hay logits
T = None
if isinstance(d.get("calibration"), dict) and "T" in d["calibration"]:
    T = float(d["calibration"]["T"])
for k in ("T","temperature"):
    if k in d and T is None:
        T = float(d[k])
if p is None and lg is not None:
    T = 1.0 if T is None else T
    p = 1.0/(1.0+np.exp(-lg/T))
p = np.asarray(p).astype(float)
y = np.asarray(y).astype(int)

# Umbral clínico
thr = None
for k in ("threshold","thr","best_thr","clinical_threshold"):
    if k in d:
        thr = float(d[k]); break
if thr is None: thr = 0.3651

# 2) Guardar CSV a nivel paciente
csv_out = os.path.join(BASE, "finetuning_patient_predictions.csv")
pd.DataFrame({"patient_id":ids, "y_true":y, "y_score":p}).to_csv(csv_out, index=False)

# 3) Gráficas y métricas (sin estilos ni colores fijos)
def expected_calibration_error(y_true, y_prob, n_bins=10):
    bins = np.linspace(0.0, 1.0, n_bins + 1)
    ece = 0.0
    for i in range(n_bins):
        m = (y_prob >= bins[i]) & (y_prob < bins[i+1])
        if not np.any(m): continue
        ece += m.mean() * abs(y_true[m].mean() - y_prob[m].mean())
    return float(ece)

def plot_roc(y_true, y_score, outpath):
    fpr, tpr, _ = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)
    plt.figure(); plt.plot(fpr, tpr, label=f"AUC={roc_auc:.3f}")
    plt.plot([0,1],[0,1],'--'); plt.xlabel("1 - Especificidad (FPR)"); plt.ylabel("Sensibilidad (TPR)")
    plt.title("ROC — Fine-Tuning EfficientNet-B3 (Paciente)"); plt.legend(loc="lower right"); plt.tight_layout()
    plt.savefig(outpath, dpi=180); plt.close()
    return float(roc_auc)

def plot_pr(y_true, y_score, outpath):
    prec, rec, _ = precision_recall_curve(y_true, y_score)
    ap = average_precision_score(y_true, y_score)
    plt.figure(); plt.plot(rec, prec, label=f"PR-AUC={ap:.3f}")
    plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title("PR — Fine-Tuning EfficientNet-B3 (Paciente)")
    plt.legend(loc="lower left"); plt.tight_layout(); plt.savefig(outpath, dpi=180); plt.close()
    return float(ap)

def plot_calibration(y_true, y_score, outpath, n_bins=10):
    bins = np.linspace(0.0, 1.0, n_bins + 1)
    accs, confs = [], []
    for i in range(n_bins):
        m = (y_score >= bins[i]) & (y_score < bins[i+1])
        if not np.any(m): continue
        accs.append(float(y_true[m].mean())); confs.append(float(y_score[m].mean()))
    plt.figure(); plt.plot([0,1],[0,1],'--'); plt.plot(confs, accs, "o-")
    plt.xlabel("Confianza media"); plt.ylabel("Frecuencia empírica")
    plt.title("Curva de Calibración — Paciente"); plt.tight_layout(); plt.savefig(outpath, dpi=180); plt.close()

def plot_confusion(y_true, y_score, thr, outpath):
    y_pred = (y_score >= thr).astype(int)
    cm = confusion_matrix(y_true, y_pred, labels=[0,1])
    plt.figure(); plt.imshow(cm)
    plt.xticks([0,1],["Pred 0","Pred 1"]); plt.yticks([0,1],["True 0","True 1"])
    for (i,j),v in np.ndenumerate(cm): plt.text(j,i,str(v),ha="center",va="center")
    plt.title(f"Matriz de confusión (thr={thr})"); plt.tight_layout(); plt.savefig(outpath, dpi=180); plt.close()
    return cm

roc_auc = plot_roc(y, p, os.path.join(OUTDIR, "ft_b3_patient_roc.png"))
pr_auc  = plot_pr(y, p, os.path.join(OUTDIR, "ft_b3_patient_pr.png"))
brier   = brier_score_loss(y, p)
ece     = expected_calibration_error(y, p, n_bins=10)
cm      = plot_confusion(y, p, thr, os.path.join(OUTDIR, f"ft_b3_patient_confusion_thr{str(thr).replace('.','')}.png"))
plot_calibration(y, p, os.path.join(OUTDIR, "ft_b3_patient_calibration.png"))

with open(os.path.join(OUTDIR, "ft_b3_patient_metrics.txt"), "w", encoding="utf-8") as f:
    f.write(f"ROC-AUC: {roc_auc:.3f}\nPR-AUC: {pr_auc:.3f}\nBrier: {brier:.3f}\nECE(10): {ece:.3f}\n")
    f.write(f"TP={int(cm[1,1])} FP={int(cm[0,1])} TN={int(cm[0,0])} FN={int(cm[1,0])} (thr={thr})\n")

print("✅ Listo. CSV y gráficas en:", BASE)

# 4) Descarga directa si estás en Colab
try:
    from google.colab import files
    files.download(os.path.join(BASE, "finetuning_patient_predictions.csv"))
    for fn in os.listdir(OUTDIR):
        files.download(os.path.join(OUTDIR, fn))
except Exception:
    pass


AssertionError: No existe ft_effb3_colab/ft_effb3_patient_eval.json. Vuelve a ejecutar las celdas del notebook que lo generan.