In [1]:
# Celda 0: Dependencias y configuración
!pip -q install timm==1.0.9 --no-deps

import os, math, time, json, random
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms

import timm
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, accuracy_score, precision_score, recall_score

from google.colab import drive
drive.mount('/content/drive')

SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# Rutas
DATA_DIR = "/content/drive/MyDrive/CognitivaAI/oas1_data"
CSV_TRAIN = f"{DATA_DIR}/oas1_train_colab_mapped.csv"
CSV_VAL   = f"{DATA_DIR}/oas1_val_colab_mapped.csv"
CSV_TEST  = f"{DATA_DIR}/oas1_test_colab_mapped.csv"
OUT_DIR   = "/content/drive/MyDrive/CognitivaAI/ft_effb3_colab"
os.makedirs(OUT_DIR, exist_ok=True)

# Hiperparámetros base
IMG_SIZE   = 300            # recomendado para EfficientNet-B3
BATCH_SIZE = 32             # T4 friendly (ajusta a 24-40 si falta memoria)
NUM_WORKERS= 2
EPOCHS     = 12             # entrenamiento corto con early stopping
BASE_LR    = 3e-4
WD         = 1e-4
PATIENCE   = 4              # early stopping


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.3/2.3 MB[0m [31m132.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Device: cuda


In [2]:
# Celda 1: Dataset MRI slices y DataLoaders

class MRISliceDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        df = pd.read_csv(csv_path)
        assert {'png_path','target','patient_id','scan_id'}.issubset(df.columns), "CSV con columnas requeridas"
        self.paths = df['png_path'].astype(str).tolist()
        self.labels = df['target'].astype(int).to_numpy()
        self.pids = df['patient_id'].astype(str).to_numpy()
        self.sids = df['scan_id'].astype(str).to_numpy()
        self.transform = transform

    def __len__(self): return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        img = Image.open(path).convert('L')  # imágenes axiales en escala de grises
        img = img.resize((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
        img = np.array(img, dtype=np.float32) / 255.0
        img = np.stack([img, img, img], axis=0)  # 1->3 canales
        if self.transform:
            # transform de torchvision espera PIL o tensor HWC; convertimos
            img_t = transforms.functional.to_pil_image(img.transpose(1,2,0))
            img_t = self.transform(img_t)
        else:
            img_t = torch.from_numpy(img)
        y = self.labels[idx]
        return img_t, y, self.pids[idx], self.sids[idx], path

# Transforms
mean_std = ([0.485,0.456,0.406],[0.229,0.224,0.225])
train_tfms = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.85,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=7),
    transforms.ToTensor(),
    transforms.Normalize(*mean_std),
])
eval_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(*mean_std),
])

ds_tr = MRISliceDataset(CSV_TRAIN, transform=train_tfms)
ds_va = MRISliceDataset(CSV_VAL,   transform=eval_tfms)
ds_te = MRISliceDataset(CSV_TEST,  transform=eval_tfms)

# Sampler balanceado por clase (opcional pero útil)
class_counts = np.bincount(ds_tr.labels, minlength=2)
w_neg, w_pos = 1.0/class_counts[0], 1.0/class_counts[1]
sample_weights = np.where(ds_tr.labels==1, w_pos, w_neg)
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

dl_tr = DataLoader(ds_tr, batch_size=BATCH_SIZE, sampler=sampler,  num_workers=NUM_WORKERS, pin_memory=True)
dl_va = DataLoader(ds_va, batch_size=BATCH_SIZE, shuffle=False,     num_workers=NUM_WORKERS, pin_memory=True)
dl_te = DataLoader(ds_te, batch_size=BATCH_SIZE, shuffle=False,     num_workers=NUM_WORKERS, pin_memory=True)

print("TRAIN slices:", len(ds_tr), "| VAL:", len(ds_va), "| TEST:", len(ds_te))
print("Class counts train:", class_counts, "→ pos_weight≈", round(class_counts[0]/max(1,class_counts[1]),3))


TRAIN slices: 2820 | VAL: 940 | TEST: 940
Class counts train: [1620 1200] → pos_weight≈ 1.35


In [3]:
# Celda 2: Modelo EfficientNet-B3 con fine-tuning parcial

BACKBONE = "tf_efficientnet_b3_ns"  # timm
model = timm.create_model(BACKBONE, pretrained=True, num_classes=0, in_chans=3)
feat_dim = model.num_features

# Head ligera
head = nn.Sequential(
    nn.Dropout(p=0.3),
    nn.Linear(feat_dim, 1)
)

net = nn.Sequential(model, head).to(DEVICE)

# Congelar todo menos el último bloque del backbone + head
for p in model.parameters():
    p.requires_grad = False

# Descongelar el último bloque de EfficientNet-B3
# Identificamos módulos finales típicos en timm
for name, module in model.named_modules():
    last_block = ('blocks.6', 'blocks.7')  # por si la variante incluye más
    if any(name.startswith(lb) for lb in last_block):
        for p in module.parameters():
            p.requires_grad = True

# Head entrenable
for p in head.parameters():
    p.requires_grad = True

# Optimizador
trainable_params = [p for p in net.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(trainable_params, lr=BASE_LR, weight_decay=WD)
# Cosine schedule
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# Pérdida con pos_weight
pos_weight = torch.tensor([class_counts[0]/max(1,class_counts[1])], device=DEVICE, dtype=torch.float32)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

print("Trainable params:", sum(p.numel() for p in net.parameters() if p.requires_grad))


  model = create_fn(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

Trainable params: 3285755


In [4]:
# Celda 3: Entrenamiento

def run_epoch(dataloader, train=True):
    net.train(train)
    total_loss = 0.0
    logits_all, y_all = [], []
    scaler = torch.cuda.amp.GradScaler(enabled=True)
    for xb, yb, *_ in dataloader:
        xb, yb = xb.to(DEVICE, non_blocking=True), yb.float().to(DEVICE)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(True):
            logits = net(xb).squeeze(1)
            loss = criterion(logits, yb)
        if train:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        total_loss += loss.item() * xb.size(0)
        logits_all.append(logits.detach().float().cpu().numpy())
        y_all.append(yb.detach().float().cpu().numpy())
    if not train:
        with torch.no_grad():
            pass
    if train:
        scheduler.step()
    y_all = np.concatenate(y_all)
    logits_all = np.concatenate(logits_all)
    probs_all = 1/(1+np.exp(-logits_all))
    # Métricas slice-level
    auc = roc_auc_score(y_all, probs_all) if len(np.unique(y_all))>1 else np.nan
    pr  = average_precision_score(y_all, probs_all) if len(np.unique(y_all))>1 else np.nan
    # Thr 0.5
    yhat = (probs_all >= 0.5).astype(int)
    acc = accuracy_score(y_all, yhat)
    pre = precision_score(y_all, yhat, zero_division=0)
    rec = recall_score(y_all, yhat, zero_division=0)
    brier = np.mean((probs_all - y_all)**2)
    return {
        "loss": float(total_loss/len(dataloader.dataset)),
        "auc": float(auc) if not np.isnan(auc) else None,
        "pr": float(pr) if not np.isnan(pr) else None,
        "acc": float(acc),
        "pre": float(pre),
        "rec": float(rec),
        "brier": float(brier),
        "probs": probs_all.tolist(), # Convert numpy array to list
        "logits": logits_all.tolist(), # Convert numpy array to list
        "y": y_all.tolist() # Convert numpy array to list
    }

best_val = -np.inf
pat = 0
hist = []

for epoch in range(1, EPOCHS+1):
    tr = run_epoch(dl_tr, train=True)
    va = run_epoch(dl_va, train=False)
    # criterio: PR-AUC slice en VAL (más sensible a clase positiva)
    score = va["pr"] if va["pr"] is not None else va["auc"]
    hist.append({"epoch":epoch, "train":tr, "val":va})
    print(f"[{epoch:02d}] TR loss={tr['loss']:.4f} | VAL AUC={va['auc']:.3f} PR-AUC={va['pr']:.3f} Brier={va['brier']:.3f}")
    if score is not None and score > best_val:
        best_val = score
        pat = 0
        torch.save(net.state_dict(), f"{OUT_DIR}/best_ft_effb3.pth")
    else:
        pat += 1
        if pat >= PATIENCE:
            print("→ Early stopping.")
            break

# Guardar historial
with open(f"{OUT_DIR}/train_history.json","w") as f:
    json.dump(hist, f)
print("Entrenamiento finalizado. Mejor PR-AUC VAL:", round(best_val,4))

  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[01] TR loss=0.7466 | VAL AUC=0.674 PR-AUC=0.558 Brier=0.237


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[02] TR loss=0.6407 | VAL AUC=0.652 PR-AUC=0.563 Brier=0.237


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[03] TR loss=0.5669 | VAL AUC=0.666 PR-AUC=0.568 Brier=0.246


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[04] TR loss=0.5075 | VAL AUC=0.671 PR-AUC=0.583 Brier=0.242


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[05] TR loss=0.4643 | VAL AUC=0.665 PR-AUC=0.583 Brier=0.249


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[06] TR loss=0.4370 | VAL AUC=0.655 PR-AUC=0.577 Brier=0.257


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[07] TR loss=0.3698 | VAL AUC=0.667 PR-AUC=0.573 Brier=0.260


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[08] TR loss=0.3477 | VAL AUC=0.666 PR-AUC=0.572 Brier=0.277


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):
  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(True):


[09] TR loss=0.3292 | VAL AUC=0.661 PR-AUC=0.568 Brier=0.278
→ Early stopping.
Entrenamiento finalizado. Mejor PR-AUC VAL: 0.5833


In [5]:
# Celda 4: Inferencia + pooling paciente (mean y attention)

# Cargar mejor modelo
net.load_state_dict(torch.load(f"{OUT_DIR}/best_ft_effb3.pth", map_location=DEVICE))
net.eval()

@torch.no_grad()
def infer(dataloader):
    logits_all, y_all, pids_all = [], [], []
    for xb, yb, pids, *_ in dataloader:
        xb = xb.to(DEVICE)
        lg = net(xb).squeeze(1)
        logits_all.append(lg.float().cpu().numpy())
        y_all.append(yb.numpy())
        pids_all += list(pids)
    logits = np.concatenate(logits_all)
    y = np.concatenate(y_all)
    probs = 1/(1+np.exp(-logits))
    return logits, probs, y, np.array(pids_all)

log_tr, pr_tr, y_tr, pid_tr = infer(dl_tr)
log_va, pr_va, y_va, pid_va = infer(dl_va)
log_te, pr_te, y_te, pid_te = infer(dl_te)

def patient_pool_mean(probs, labels, pids):
    df = pd.DataFrame({"pid":pids, "y":labels, "p":probs})
    g = df.groupby("pid")
    p_pool = g["p"].mean().values
    y_pool = g["y"].mean().round().astype(int).values
    return y_pool, p_pool, g.size().values

def patient_pool_attention(logits, labels, pids, temp=1.0):
    # Atención softmax sobre |logits| como importancias (simple, estable)
    df = pd.DataFrame({"pid":pids, "y":labels, "z":logits})
    outs = []
    for pid, grp in df.groupby("pid"):
        z = grp["z"].values
        # pesos ~ softmax(|z|/T) para resaltar slices informativos
        w = np.exp(np.abs(z)/temp); w = w / (w.sum()+1e-8)
        p = 1/(1+np.exp(-z))
        p_att = (w*p).sum()
        y = int(round(grp["y"].mean()))
        outs.append((pid, y, p_att, len(grp)))
    outs = pd.DataFrame(outs, columns=["pid","y","p","n"])
    return outs["y"].values, outs["p"].values, outs["n"].values

def eval_patient(y, p, thr=0.5):
    auc = roc_auc_score(y, p) if len(np.unique(y))>1 else np.nan
    pr  = average_precision_score(y, p) if len(np.unique(y))>1 else np.nan
    yhat = (p>=thr).astype(int)
    acc = accuracy_score(y, yhat)
    pre = precision_score(y, yhat, zero_division=0)
    rec = recall_score(y, yhat, zero_division=0)
    return {"AUC":auc,"PR-AUC":pr,"Acc":acc,"P":pre,"R":rec,"thr":thr,"n":len(y)}

# Mean pooling
yV_m, pV_m, _ = patient_pool_mean(pr_va, y_va, pid_va)
yT_m, pT_m, _ = patient_pool_mean(pr_te, y_te, pid_te)

# Attention pooling
yV_a, pV_a, _ = patient_pool_attention(log_va, y_va, pid_va, temp=1.0)
yT_a, pT_a, _ = patient_pool_attention(log_te, y_te, pid_te, temp=1.0)

print("VAL (mean@0.5):", eval_patient(yV_m, pV_m, 0.5))
print("TEST(mean@0.5):", eval_patient(yT_m, pT_m, 0.5))
print("VAL (attn@0.5):", eval_patient(yV_a, pV_a, 0.5))
print("TEST(attn@0.5):", eval_patient(yT_a, pT_a, 0.5))


VAL (mean@0.5): {'AUC': np.float64(0.7388888888888889), 'PR-AUC': np.float64(0.6587843825001534), 'Acc': 0.6382978723404256, 'P': 0.6, 'R': 0.45, 'thr': 0.5, 'n': 47}
TEST(mean@0.5): {'AUC': np.float64(0.875925925925926), 'PR-AUC': np.float64(0.7626011139703089), 'Acc': 0.723404255319149, 'P': 0.7333333333333333, 'R': 0.55, 'thr': 0.5, 'n': 47}
VAL (attn@0.5): {'AUC': np.float64(0.7611111111111111), 'PR-AUC': np.float64(0.6851116491294511), 'Acc': 0.6382978723404256, 'P': 0.6153846153846154, 'R': 0.4, 'thr': 0.5, 'n': 47}
TEST(attn@0.5): {'AUC': np.float64(0.8722222222222222), 'PR-AUC': np.float64(0.764498046830885), 'Acc': 0.7659574468085106, 'P': 0.8461538461538461, 'R': 0.55, 'thr': 0.5, 'n': 47}


In [6]:
# Celda 5: Temperature scaling (ajuste en VAL) y evaluación paciente

class TemperatureScaler(nn.Module):
    def __init__(self):
        super().__init__()
        self.logT = nn.Parameter(torch.zeros(1))  # T = exp(logT) >= 1

    def forward(self, logits):
        T = torch.exp(self.logT) + 1e-6
        return logits / T

def fit_temperature(logits_val, y_val, max_iter=2000, lr=0.01):
    y = torch.tensor(y_val, dtype=torch.float32, device=DEVICE)
    z = torch.tensor(logits_val, dtype=torch.float32, device=DEVICE)
    ts = TemperatureScaler().to(DEVICE)
    opt = torch.optim.LBFGS(ts.parameters(), lr=lr, max_iter=50, line_search_fn="strong_wolfe")

    bce = nn.BCEWithLogitsLoss()
    def closure():
        opt.zero_grad(set_to_none=True)
        zT = ts(z)
        loss = bce(zT, y)
        loss.backward()
        return loss

    last = 1e9
    for _ in range(30):
        loss = opt.step(closure)
        if abs(loss.item()-last) < 1e-7:
            break
        last = loss.item()
    with torch.no_grad():
        T = torch.exp(ts.logT).item() + 1e-6
    return ts, T

ts, T_val = fit_temperature(log_va, y_va)
print("Temperatura ajustada (VAL):", round(T_val,4))

def apply_T(logits, T):
    return logits / (T + 1e-6)

# Aplicar T
pV_m_T = 1/(1+np.exp(-apply_T(log_va, T_val)))
pT_m_T = 1/(1+np.exp(-apply_T(log_te, T_val)))

# Recalcular pooling
yV_mean, pV_mean, _ = patient_pool_mean(pV_m_T, y_va, pid_va)
yT_mean, pT_mean, _ = patient_pool_mean(pT_m_T, y_te, pid_te)

yV_attn, pV_attn, _ = patient_pool_attention(apply_T(log_va, T_val), y_va, pid_va, temp=1.0)
yT_attn, pT_attn, _ = patient_pool_attention(apply_T(log_te, T_val), y_te, pid_te, temp=1.0)

print("VAL mean (temp):", eval_patient(yV_mean, pV_mean, 0.5))
print("TEST mean(temp):", eval_patient(yT_mean, pT_mean, 0.5))
print("VAL attn (temp):", eval_patient(yV_attn, pV_attn, 0.5))
print("TEST attn(temp):", eval_patient(yT_attn, pT_attn, 0.5))


Temperatura ajustada (VAL): 2.6732
VAL mean (temp): {'AUC': np.float64(0.7481481481481482), 'PR-AUC': np.float64(0.664989747813566), 'Acc': 0.6382978723404256, 'P': 0.6, 'R': 0.45, 'thr': 0.5, 'n': 47}
TEST mean(temp): {'AUC': np.float64(0.8759259259259259), 'PR-AUC': np.float64(0.7620865452057403), 'Acc': 0.723404255319149, 'P': 0.7333333333333333, 'R': 0.55, 'thr': 0.5, 'n': 47}
VAL attn (temp): {'AUC': np.float64(0.75), 'PR-AUC': np.float64(0.660088903151692), 'Acc': 0.6382978723404256, 'P': 0.6153846153846154, 'R': 0.4, 'thr': 0.5, 'n': 47}
TEST attn(temp): {'AUC': np.float64(0.8777777777777778), 'PR-AUC': np.float64(0.7617757509275546), 'Acc': 0.723404255319149, 'P': 0.7333333333333333, 'R': 0.55, 'thr': 0.5, 'n': 47}


In [7]:
# Celda 6: Umbral clínico (VAL recall≥0.90) y evaluación en TEST

def pick_threshold_for_recall(y, p, min_recall=0.90):
    prec, rec, thr = precision_recall_curve(y, p)
    # precision_recall_curve devuelve thr para todos menos el primer punto
    thr = np.append(thr, 1.0)  # para igualar longitudes
    # buscamos el primer punto con recall >= min_recall que maximice precisión
    mask = (rec >= min_recall)
    if mask.any():
        idx = np.argmax(prec[mask])
        thr_sel = thr[mask][idx]
        return float(thr_sel), float(prec[mask][idx]), float(rec[mask][idx])
    else:
        # si no hay, devolvemos el que más recall tenga
        idx = np.argmax(rec)
        return float(thr[idx]), float(prec[idx]), float(rec[idx])

# Elegimos el *mejor pooling* en VAL (entre mean y attn tras temperature scaling) por PR-AUC
def pr_auc(y,p):
    return average_precision_score(y,p) if len(np.unique(y))>1 else np.nan

pr_val_mean = pr_auc(yV_mean, pV_mean)
pr_val_attn = pr_auc(yV_attn, pV_attn)
use_attn = (pr_val_attn > pr_val_mean)
print(f"Comparativa VAL PR-AUC: mean={pr_val_mean:.3f} | attn={pr_val_attn:.3f} → usar {'ATTN' if use_attn else 'MEAN'}")

if use_attn:
    thr, prec, rec = pick_threshold_for_recall(yV_attn, pV_attn, min_recall=0.90)
    val_metrics  = eval_patient(yV_attn, pV_attn, thr)
    test_metrics = eval_patient(yT_attn, pT_attn, thr)
else:
    thr, prec, rec = pick_threshold_for_recall(yV_mean, pV_mean, min_recall=0.90)
    val_metrics  = eval_patient(yV_mean, pV_mean, thr)
    test_metrics = eval_patient(yT_mean, pT_mean, thr)

print(f"→ Umbral clínico (VAL recall≥0.90): thr={thr:.4f} | precision={prec:.3f} | recall={rec:.3f}")
print("[VAL-final]", val_metrics)
print("[TEST-final]", test_metrics)

# Guardar resumen
res = {
    "pooling_used": "attention" if use_attn else "mean",
    "temperature": T_val,
    "threshold": thr,
    "val_metrics": val_metrics,
    "test_metrics": test_metrics
}
with open(f"{OUT_DIR}/ft_effb3_patient_eval.json","w") as f:
    json.dump(res, f, indent=2)
print("Resumen guardado en:", f"{OUT_DIR}/ft_effb3_patient_eval.json")


Comparativa VAL PR-AUC: mean=0.665 | attn=0.660 → usar MEAN
→ Umbral clínico (VAL recall≥0.90): thr=0.3651 | precision=0.588 | recall=1.000
[VAL-final] {'AUC': np.float64(0.7481481481481482), 'PR-AUC': np.float64(0.664989747813566), 'Acc': 0.7021276595744681, 'P': 0.5882352941176471, 'R': 1.0, 'thr': 0.3651449978351593, 'n': 47}
[TEST-final] {'AUC': np.float64(0.8759259259259259), 'PR-AUC': np.float64(0.7620865452057403), 'Acc': 0.7446808510638298, 'P': 0.625, 'R': 1.0, 'thr': 0.3651449978351593, 'n': 47}
Resumen guardado en: /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json


In [10]:
# ===============================================
# ✅ CELDA FINAL (sin predicciones): lee métricas del JSON → gráficos y reportes
#    - Busca ft_effb3_patient_eval.json en todo el workspace.
#    - Usa solo métricas agregadas (VAL/TEST) y umbral/temperature.
#    - Reconstruye la matriz de confusión (TEST) a partir de Acc, P, R, n.
#    - Genera: confusion.png, pr_point.png, bars_auc_prauc.png, metrics.txt, metrics.csv
# ===============================================
import os, json, glob, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---- Localizar el JSON en cualquier ruta
CANDIDATES = [
    "ft_effb3_colab/ft_effb3_patient_eval.json",
    "ft_effb3_patient_eval.json"
]
json_path = None
for c in CANDIDATES:
    if os.path.exists(c):
        json_path = c
        break
if json_path is None:
    hits = glob.glob("**/ft_effb3_patient_eval.json", recursive=True)
    if hits:
        json_path = hits[0]

assert json_path is not None, "No se encontró 'ft_effb3_patient_eval.json' en el workspace."
print("📄 Usando JSON:", json_path)

# ---- Cargar
with open(json_path, "r", encoding="utf-8") as f:
    d = json.load(f)

# ---- Extraer parámetros
temperature = d.get("temperature") or (d.get("calibration", {}) if isinstance(d.get("calibration"), dict) else {}).get("T")
threshold   = d.get("threshold") or d.get("thr") or d.get("best_thr") or (d.get("val_metrics", {}) or {}).get("thr") or (d.get("test_metrics", {}) or {}).get("thr")

val = d.get("val_metrics", {})
tes = d.get("test_metrics", {})

def _get(m, key, default=None):
    return m.get(key, default)

# ---- Construir DataFrame de métricas (VAL/TEST)
rows = []
if val:
    rows.append({"split":"VAL",
                 "AUC":_get(val,"AUC"),
                 "PR-AUC":_get(val,"PR-AUC"),
                 "Acc":_get(val,"Acc"),
                 "P":_get(val,"P"),
                 "R":_get(val,"R"),
                 "thr":_get(val,"thr", threshold),
                 "n":_get(val,"n")})
if tes:
    rows.append({"split":"TEST",
                 "AUC":_get(tes,"AUC"),
                 "PR-AUC":_get(tes,"PR-AUC"),
                 "Acc":_get(tes,"Acc"),
                 "P":_get(tes,"P"),
                 "R":_get(tes,"R"),
                 "thr":_get(tes,"thr", threshold),
                 "n":_get(tes,"n")})

metrics_df = pd.DataFrame(rows)
OUTDIR = os.path.join(os.path.dirname(json_path) or ".", "graphs_from_metrics")
os.makedirs(OUTDIR, exist_ok=True)
metrics_csv = os.path.join(OUTDIR, "ft_b3_summary_metrics.csv")
metrics_txt = os.path.join(OUTDIR, "ft_b3_summary_metrics.txt")
metrics_df.to_csv(metrics_csv, index=False)

# ---- Reconstruir matriz de confusión (TEST) con Acc, P, R, n
# Fórmulas:
#   Precision = TP / (TP + FP)  -> FP = TP * (1-P)/P
#   Recall    = TP / (TP + FN)  -> FN = TP * (1-R)/R
#   Accuracy  = (TP + TN) / n   -> TN = Acc*n - TP
#   Suma      = TP + FP + TN + FN = n
# Sustituyendo ->  TP * ((1-P)/P) + (Acc*n - TP) + TP * ((1-R)/R) + TP = n
# Se despeja TP (real) y luego se redondea al entero más cercano y se ajusta TN = Acc*n - TP.
cm_png = os.path.join(OUTDIR, "ft_b3_patient_confusion_from_metrics.png")
pr_point_png = os.path.join(OUTDIR, "ft_b3_pr_point.png")
bars_png = os.path.join(OUTDIR, "ft_b3_bars_auc_prauc.png")

if tes and all(k in tes for k in ["Acc","P","R","n"]):
    Acc = float(tes["Acc"])
    P   = float(tes["P"])
    R   = float(tes["R"])
    n   = int(tes["n"])

    # Manejo de R=1 o P=1 para evitar divisiones por cero
    eps = 1e-12
    denom = ( (1.0 - P)/(P + eps) ) + ( (1.0 - R)/(R + eps) ) + 1.0  # coeficiente de TP tras agrupar términos
    # Derivación compacta basada en: TP*((1-P)/P) + (Acc*n - TP) + TP*((1-R)/R) + TP = n  -> TP*denom + Acc*n - TP = n
    #                                -> TP*(denom) = n*(1-Acc)
    TP_real = n * (1.0 - Acc) / max(( (1.0 - P)/(P + eps) + (1.0 - R)/(R + eps) + 1.0 ), eps)

    TP = int(round(TP_real))
    # Recalcular con fórmulas discretas
    FP = int(round(TP * (1.0 - P) / max(P, eps)))
    FN = int(round(TP * (1.0 - R) / max(R, eps)))
    TN = int(round(Acc * n)) - TP
    # Ajuste simple por si el redondeo rompe la suma
    delta = (TP + FP + TN + FN) - n
    if delta != 0:
        # Corrige en TN si hace falta
        TN = TN - delta

    print(f"✅ Confusion TEST reconstruida: TP={TP}, FP={FP}, TN={TN}, FN={FN}  (n={n}, thr={threshold})")

    # --- Graficar matriz (sin estilos fijos)
    import matplotlib.pyplot as plt
    import numpy as np
    cm = np.array([[TN, FP],
                   [FN, TP]])
    plt.figure()
    plt.imshow(cm)
    plt.xticks([0,1], ["Pred 0","Pred 1"])
    plt.yticks([0,1], ["True 0","True 1"])
    for (i,j),v in np.ndenumerate(cm):
        plt.text(j, i, str(int(v)), ha="center", va="center")
    plt.title(f"Matriz de confusión (TEST) — thr={threshold}")
    plt.tight_layout()
    plt.savefig(cm_png, dpi=180); plt.close()
else:
    print("⚠️ No hay campos suficientes en TEST para reconstruir la matriz de confusión (se necesitan Acc,P,R,n).")

# ---- PR point (solo el punto, no la curva)
if tes and ("P" in tes) and ("R" in tes):
    Pp = float(tes["P"]); Rr = float(tes["R"])
    plt.figure()
    plt.plot([0,1],[Pp,Pp], linestyle="--")  # línea horizontal para referencia
    plt.plot([Rr], [Pp], marker="o")
    plt.xlim(0,1); plt.ylim(0,1)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Punto Precision–Recall (TEST)")
    plt.tight_layout()
    plt.savefig(pr_point_png, dpi=180); plt.close()

# ---- Barras de AUC y PR-AUC (VAL/TEST)
labels = []
auc_vals = []
prauc_vals = []
if val:
    labels.append("VAL"); auc_vals.append(float(val.get("AUC", np.nan))); prauc_vals.append(float(val.get("PR-AUC", np.nan)))
if tes:
    labels.append("TEST"); auc_vals.append(float(tes.get("AUC", np.nan))); prauc_vals.append(float(tes.get("PR-AUC", np.nan)))

if labels:
    # AUC
    plt.figure()
    plt.bar(labels, auc_vals)
    plt.ylim(0,1)
    plt.ylabel("AUC")
    plt.title("AUC (VAL/TEST)")
    plt.tight_layout()
    plt.savefig(bars_png.replace("auc_prauc","auc"), dpi=180); plt.close()

    # PR-AUC
    plt.figure()
    plt.bar(labels, prauc_vals)
    plt.ylim(0,1)
    plt.ylabel("PR-AUC")
    plt.title("PR-AUC (VAL/TEST)")
    plt.tight_layout()
    plt.savefig(bars_png.replace("auc_prauc","prauc"), dpi=180); plt.close()

# ---- Guardar resumen en .txt
with open(metrics_txt, "w", encoding="utf-8") as f:
    f.write(f"Archivo: {json_path}\n")
    if temperature is not None:
        f.write(f"Temperature: {temperature}\n")
    if threshold is not None:
        f.write(f"Threshold: {threshold}\n")
    f.write("\n== VAL ==\n")
    for k in ["AUC","PR-AUC","Acc","P","R","thr","n"]:
        if k in val: f.write(f"{k}: {val[k]}\n")
    f.write("\n== TEST ==\n")
    for k in ["AUC","PR-AUC","Acc","P","R","thr","n"]:
        if k in tes: f.write(f"{k}: {tes[k]}\n")
    if tes and all(k in tes for k in ["Acc","P","R","n"]):
        f.write("\nMatriz de confusión (TEST) reconstruida a partir de Acc,P,R,n:\n")
        f.write(f"TP={TP} FP={FP} TN={TN} FN={FN}\n")

print("📁 Resultados guardados en:", OUTDIR)

# ---- Descarga directa (si estás en Colab)
try:
    from google.colab import files
    for fn in [metrics_csv, metrics_txt, cm_png, pr_point_png,
               bars_png.replace("auc_prauc","auc"),
               bars_png.replace("auc_prauc","prauc")]:
        if os.path.exists(fn):
            files.download(fn)
except Exception:
    pass




📄 Usando JSON: drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json
✅ Confusion TEST reconstruida: TP=8, FP=5, TN=34, FN=0  (n=47, thr=0.3651449978351593)
📁 Resultados guardados en: drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
# --- MONTAJE ROBUSTO DEL DRIVE ---
from google.colab import drive
import os, shutil

MOUNT = '/content/drive'

def safe_mount():
    try:
        # Si la carpeta existe y tiene contenido, borrar solo enlaces residuales del colab
        if os.path.islink(MOUNT):
            os.unlink(MOUNT)
        if os.path.isdir(MOUNT) and os.listdir(MOUNT):
            # Si falla por "ya contiene archivos", forzamos remount
            drive.mount(MOUNT, force_remount=True)
        else:
            drive.mount(MOUNT)
    except Exception as e:
        print("⚠️ Problema montando Drive. Fuerzo remount…", e)
        drive.mount(MOUNT, force_remount=True)

safe_mount()
!ls -lah /content/drive/MyDrive | sed -n '1,120p'




Mounted at /content/drive
total 301M
-rw------- 1 root root  189 Dec  9  2023 1354098917.gdoc
-rw------- 1 root root 4.6M Dec  4  2023 1354098917.pdf
-rw------- 1 root root  189 Aug 27  2023 202307010CV-FRM-TIC-EN.gdoc
-rw------- 1 root root  189 Oct 15  2023 20230710CV-FRM-TIC.gdoc
-rw------- 1 root root  189 Sep 14  2023 20230914CV-FRM-ADECCO.gdoc
-rw------- 1 root root 220K May 21  2024 20240521DARDE.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS1.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS2.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS3.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS4.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS5.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS6.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS7.pdf
-rw------- 1 root root 181K Dec 18  2024 24-165+REMISION+COPIAS8.pdf
-rw------- 1 root root  40K Apr  7  2

In [7]:
# --- LOCALIZADOR DE RUTAS: muestra dónde están tus ficheros ---
import glob, os
from pathlib import Path

def find(patterns, roots=('/content', '/content/drive/MyDrive')):
    hits = []
    for root in roots:
        for pat in patterns:
            hits.extend(glob.glob(os.path.join(root, '**', pat), recursive=True))
    return sorted(set(hits))

candidates = {
    "checkpoint (best_ft_effb3.pth)": find(['best_ft_effb3.pth','effb3_finetuned.pth','best_model.pth']),
    "eval json (ft_effb3_patient_eval.json)": find(['ft_effb3_patient_eval.json']),
    # CSVs que comentaste:
    "oas1_val_colab_mapped.csv": find(['oas1_val_colab_mapped.csv']),
    "oas1_test_colab_mapped.csv": find(['oas1_test_colab_mapped.csv']),
    # por si están con otros nombres:
    "oas1_val.csv": find(['oas1_val.csv']),
    "oas1_test.csv": find(['oas1_test.csv']),
    "val_mapped.csv": find(['val_mapped.csv','val.csv']),
    "test_mapped.csv": find(['test_mapped.csv','test.csv']),
}

for k,v in candidates.items():
    print(f"\n{k}:")
    if v:
        for p in v[:6]:
            print("  -", p)
        if len(v) > 6:
            print(f"  (+{len(v)-6} más)")
    else:
        print("  (no encontrado)")



checkpoint (best_ft_effb3.pth):
  - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/best_ft_effb3.pth

eval json (ft_effb3_patient_eval.json):
  - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json

oas1_val_colab_mapped.csv:
  - /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_val_colab_mapped.csv

oas1_test_colab_mapped.csv:
  - /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_test_colab_mapped.csv

oas1_val.csv:
  - /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_val.csv

oas1_test.csv:
  - /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_test.csv

val_mapped.csv:
  (no encontrado)

test_mapped.csv:
  (no encontrado)


In [9]:
from pathlib import Path
import time

def sanity_check_dataset(df, sample=256):
    n = len(df)
    print(f"Slices totales: {n}")
    from PIL import Image
    import numpy as np
    from tqdm import trange

    t0 = time.time()
    for i in trange(min(sample, n)):
        _ = Image.open(df["img_path"].iloc[i]).convert("RGB")
    dt = time.time() - t0
    ips = min(sample,n)/max(dt,1e-6)
    print(f"≈ {ips:.1f} imágenes/s solo lectura Drive → ETA lectura para {n} = {n/ips/60:.1f} min (aprox)")

# Llama a sanity_check_dataset(df_val); sanity_check_dataset(df_test)



In [10]:
sanity_check_dataset(df_val)
sanity_check_dataset(df_test)

Slices totales: 940


100%|██████████| 256/256 [00:00<00:00, 276.46it/s]


≈ 275.5 imágenes/s solo lectura Drive → ETA lectura para 940 = 0.1 min (aprox)
Slices totales: 940


100%|██████████| 256/256 [00:57<00:00,  4.45it/s]

≈ 4.5 imágenes/s solo lectura Drive → ETA lectura para 940 = 3.5 min (aprox)





In [11]:
# === Cacheo sólido a SSD local + DataLoader rápido (Colab) ===
import os, shutil, hashlib, io, time
from pathlib import Path
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

LOCAL_ROOT = Path("/content/mri_cache"); LOCAL_ROOT.mkdir(parents=True, exist_ok=True)

def _fingerprint(p: Path):
    try:
        st = p.stat()
        return f"{st.st_size}-{int(st.st_mtime)}"
    except Exception:
        return None

def _dst_for(src: Path):
    # nombre estable por hash para evitar colisiones si hay mismos nombres en carpetas distintas
    h = hashlib.md5(str(src).encode("utf-8")).hexdigest()[:10]
    return LOCAL_ROOT / f"{h}__{src.name}"

def _copy_file(src: Path, dst: Path, bufsize=1024*1024):
    # copia con buffer grande (más rápido que shutil.copy2 en Drive)
    with open(src, "rb") as fi, open(dst, "wb") as fo:
        shutil.copyfileobj(fi, fo, length=bufsize)
    try:
        os.utime(dst, (src.stat().st_atime, src.stat().st_mtime))
    except Exception:
        pass

def cache_to_local(df, path_col="img_path", max_workers=8):
    paths = [Path(p) for p in df[path_col].tolist()]
    todo = []
    for src in paths:
        dst = _dst_for(src)
        if not dst.exists():
            todo.append((src, dst))
    print(f"→ Preparando copia: {len(todo)} ficheros nuevos de {len(paths)} (cache en {LOCAL_ROOT})")
    t0 = time.time()
    errors = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        futs = {ex.submit(_copy_file, s, d): (s, d) for s, d in todo}
        for fut in as_completed(futs):
            s, d = futs[fut]
            try:
                fut.result()
            except Exception as e:
                errors.append((str(s), repr(e)))
    dt = time.time() - t0
    if todo:
        print(f"✅ Copiados {len(todo)-len(errors)} / {len(todo)} en {dt:.1f}s | { (len(todo)-len(errors))/max(dt,1):.1f} f/s")
    if errors:
        print("⚠️ Errores de copia (mostrando 5):")
        for e in errors[:5]: print("   ", e)
    # reescribir columna
    df = df.copy()
    df[path_col] = [str(_dst_for(Path(p))) for p in df[path_col]]
    return df

# USO: tras construir df_val y df_test (con tus rutas reales)
df_val  = cache_to_local(df_val)     # reescribe a /content/mri_cache/...
df_test = cache_to_local(df_test)

# Opcional: pequeño “termómetro” post‑cache
def quick_probe(df, sample=256):
    from PIL import Image
    import numpy as np, time
    n = min(sample, len(df))
    t0 = time.time()
    for i in range(n):
        _ = Image.open(df["img_path"].iloc[i]).convert("RGB")
    dt = time.time() - t0
    print(f"SSD local: {n/max(dt,1e-6):.1f} imágenes/s (muestra {n})")
quick_probe(df_val, 256)


→ Preparando copia: 940 ficheros nuevos de 940 (cache en /content/mri_cache)
✅ Copiados 940 / 940 en 17.6s | 53.5 f/s
→ Preparando copia: 940 ficheros nuevos de 940 (cache en /content/mri_cache)
✅ Copiados 940 / 940 en 18.3s | 51.3 f/s
SSD local: 695.3 imágenes/s (muestra 256)


In [15]:
# === DataLoader estable para Colab (2 workers) + inferencia rápida ===
import torch
torch.backends.cudnn.benchmark = True
try:
    torch.set_float32_matmul_precision("high")
except Exception:
    pass

from torch.utils.data import DataLoader

# Asumiendo que ya tienes df_val, df_test y la clase SliceDS definida
val_ds  = SliceDS(df_val)
test_ds = SliceDS(df_test)

# Ajustes conservadores y estables para Colab
val_dl  = DataLoader(
    val_ds, batch_size=128, shuffle=False,
    num_workers=2, pin_memory=True, prefetch_factor=2,
    persistent_workers=True  # si diera problemas, ponlo en False
)
test_dl = DataLoader(
    test_ds, batch_size=128, shuffle=False,
    num_workers=2, pin_memory=True, prefetch_factor=2,
    persistent_workers=True
)

model = model.to(device).eval().to(memory_format=torch.channels_last)

@torch.no_grad()
def infer_fast(dl):
    import numpy as np, time
    from tqdm import tqdm
    probs, ys, pids = [], [], []
    t0 = time.time()
    for x, y, pid in tqdm(dl, total=len(dl)):
        x = x.to(device, non_blocking=True).to(memory_format=torch.channels_last)
        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
          logits = model(x).squeeze(1)
        if T_temperature is not None:
            logits = logits / float(T_temperature)
        p = torch.sigmoid(logits).float().cpu().numpy()
        probs.append(p); ys.append(y.numpy()); pids += list(pid)
    dt = time.time() - t0
    nimgs = len(dl.dataset)
    print(f"Throughput: {nimgs/dt:.1f} img/s  (~{dt/60:.1f} min para {nimgs} imgs)")
    return np.concatenate(probs), np.concatenate(ys), np.array(pids)

# Ejecuta:
# p_val, y_val, pid_val   = infer_fast(val_dl)
# p_test, y_test, pid_test = infer_fast(test_dl)


In [16]:
p_val, y_val, pid_val   = infer_fast(val_dl)
p_test, y_test, pid_test = infer_fast(test_dl)

100%|██████████| 8/8 [00:04<00:00,  1.69it/s]


Throughput: 198.5 img/s  (~0.1 min para 940 imgs)


100%|██████████| 8/8 [00:06<00:00,  1.28it/s]

Throughput: 150.4 img/s  (~0.1 min para 940 imgs)





In [17]:
# ==== Celda final: reproducibilidad + inferencia + guardado métrico/plots ====
import os, json, time, random, math, pathlib
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, roc_curve, confusion_matrix

# --- 0) Rutas (ajústalas si fuese necesario) ---
ROOT = Path("/content/drive/MyDrive/CognitivaAI")
OUT  = ROOT / "ft_effb3_colab"
OUT.mkdir(parents=True, exist_ok=True)
GRAPHS = OUT / "graphs_from_metrics"
GRAPHS.mkdir(parents=True, exist_ok=True)

# Si existe el JSON de evaluación anterior (temperatura/umbral), lo reaprovechamos
EVAL_JSON = OUT / "ft_effb3_patient_eval.json"

# DataFrames VAL/TEST ya usados en este notebook (ajusta nombres si difieren)
# Se espera que tengas DataLoaders `val_dl` y `test_dl` y que entreguen (x, y, pid)
# y también DataFrames df_val_map / df_test_map con columnas: patient_id, y_true, ...
# Si no tienes esos DF, no pasa nada: reconstruimos por paciente con los pids de los DL.

# --- 1) Semillas + backend ---
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
try: torch.set_float32_matmul_precision("high")
except: pass

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval(); model.to(device).to(memory_format=torch.channels_last)

def run_infer(dl, T=None):
    """Inferencia rápida con medición de throughput y retorno de arrays (y_true, y_score, patient_id)."""
    ys, ps, pids = [], [], []
    # warm-up: 1 batch para estabilizar
    with torch.no_grad():
        for i, (x, y, pid) in enumerate(dl):
            x = x.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
                logits = model(x).squeeze(1)
            if T is not None:
                logits = logits / float(T)
            p = torch.sigmoid(logits).float().cpu().numpy()
            ys.append(y.numpy()); ps.append(p); pids += list(pid)
            break
    # medición real
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    t0 = time.time()
    with torch.no_grad():
        for i, (x, y, pid) in enumerate(dl):
            x = x.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
                logits = model(x).squeeze(1)
            if T is not None:
                logits = logits / float(T)
            p = torch.sigmoid(logits).float().cpu().numpy()
            ys.append(y.numpy()); ps.append(p); pids += list(pid)
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    dt = time.time() - t0
    n_imgs = sum(len(b) for b in ys)  # nº total de muestras
    thrpt = n_imgs / max(dt, 1e-9)
    return np.concatenate(ys), np.concatenate(ps), np.array(pids), thrpt

def aggregate_patient(pids, ys, ps, pooling="mean"):
    """Agrupa por patient_id con media (o max) de probabilidades y mayoritario en y_true."""
    df = pd.DataFrame({"patient_id": pids, "y_true": ys, "y_score": ps})
    agg = (df.groupby("patient_id")
             .agg(y_true=("y_true", lambda v: int(np.round(np.mean(v)))) ,
                  y_score=("y_score", np.mean if pooling=="mean" else np.max))
             .reset_index())
    return agg

def eval_patient(df_pat, thr=None):
    y = df_pat["y_true"].values.astype(int)
    s = df_pat["y_score"].values.astype(float)
    metrics = {
        "AUC": float(roc_auc_score(y, s)) if len(np.unique(y))>1 else np.nan,
        "PR-AUC": float(average_precision_score(y, s)),
    }
    if thr is None:
        # umbral por Youden en ROC
        fpr, tpr, t = roc_curve(y, s)
        j = tpr - fpr
        thr = float(t[np.argmax(j)])
    yhat = (s >= thr).astype(int)
    tn, fp, fn, tp = confusion_matrix(y, yhat, labels=[0,1]).ravel()
    P = float(tp / max(tp+fp, 1)); R = float(tp / max(tp+fn, 1))
    acc = float((tp+tn)/max(len(y),1))
    metrics.update({"Acc":acc,"P":P,"R":R,"thr":thr,"n":int(len(y))})
    return metrics, thr

# --- 2) Recuperar T y umbral previos si existen ---
T_prev, thr_prev = None, None
if EVAL_JSON.exists():
    with open(EVAL_JSON, "r", encoding="utf-8") as f:
        j = json.load(f)
    T_prev = float(j.get("temperature")) if "temperature" in j else None
    thr_prev = float(j.get("test_metrics",{}).get("thr") or j.get("val_metrics",{}).get("thr") or j.get("threshold", np.nan))
    if math.isnan(thr_prev): thr_prev=None
    print(f"🧪 Reutilizando T={T_prev} y thr≈{thr_prev} de {EVAL_JSON.name}")

# --- 3) Inferencia en VAL/TEST con medición de throughput ---
# Asumimos que tienes val_dl y test_dl en memoria:
ys_v, ps_v, pids_v, thrpt_v = run_infer(val_dl, T=T_prev)
print(f"[VAL] Throughput: {thrpt_v:.1f} img/s")
ys_t, ps_t, pids_t, thrpt_t = run_infer(test_dl, T=T_prev)
print(f"[TEST] Throughput: {thrpt_t:.1f} img/s")

# --- 4) Guardar CSV por slice y por paciente ---
val_slice = pd.DataFrame({"patient_id":pids_v, "y_true":ys_v, "y_score":ps_v})
test_slice= pd.DataFrame({"patient_id":pids_t, "y_true":ys_t, "y_score":ps_t})
val_slice.to_csv(OUT/"val_slice_preds.csv", index=False)
test_slice.to_csv(OUT/"test_slice_preds.csv", index=False)

val_pat = aggregate_patient(pids_v, ys_v, ps_v, pooling="mean")
test_pat= aggregate_patient(pids_t, ys_t, ps_t, pooling="mean")
val_pat.to_csv(OUT/"val_patient_preds.csv", index=False)
test_pat.to_csv(OUT/"test_patient_preds.csv", index=False)
print("✅ CSV guardados en", OUT)

# --- 5) Métricas a nivel paciente (respetando umbral previo si lo había) ---
val_m, thr_v = eval_patient(val_pat, thr=thr_prev)
test_m, thr_t = eval_patient(test_pat, thr=thr_prev)
print("VAL:", val_m); print("TEST:", test_m)

# Persistimos JSON de evaluación (temperatura reutilizada si había)
EVAL = {
    "pooling_used": "mean",
    "temperature": T_prev,
    "threshold": thr_t,
    "val_metrics": val_m,
    "test_metrics": test_m
}
with open(OUT/"ft_effb3_patient_eval.json", "w", encoding="utf-8") as f:
    json.dump(EVAL, f, indent=2)
print("📝 Eval JSON actualizado →", OUT/"ft_effb3_patient_eval.json")

# --- 6) Gráficos (ROC/PR, hist, confusión) ---
def plot_and_save_curves(df_pat, split):
    y = df_pat["y_true"].values.astype(int)
    s = df_pat["y_score"].values.astype(float)

    fpr, tpr, _ = roc_curve(y, s)
    prec, rec, _ = precision_recall_curve(y, s)

    plt.figure(); plt.plot(fpr, tpr); plt.plot([0,1],[0,1],'--')
    plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title(f"ROC – {split}")
    plt.grid(True, alpha=.3)
    plt.savefig(GRAPHS/f"roc_{split.lower()}.png", dpi=150, bbox_inches="tight"); plt.close()

    plt.figure(); plt.plot(rec, prec)
    plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title(f"PR – {split}")
    plt.grid(True, alpha=.3)
    plt.savefig(GRAPHS/f"pr_{split.lower()}.png", dpi=150, bbox_inches="tight"); plt.close()

    plt.figure(); plt.hist(s[y==0], bins=25, alpha=.7, label="y=0"); plt.hist(s[y==1], bins=25, alpha=.7, label="y=1")
    plt.legend(); plt.title(f"Distribución probas – {split}")
    plt.savefig(GRAPHS/f"histo_{split.lower()}.png", dpi=150, bbox_inches="tight"); plt.close()

    # Matriz de confusión con umbral óptimo del propio split
    m, thr = eval_patient(df_pat, thr=None)
    yhat = (s>=m["thr"]).astype(int)
    tn, fp, fn, tp = confusion_matrix(y, yhat, labels=[0,1]).ravel()

    plt.figure()
    plt.imshow(np.array([[tn, fp],[fn, tp]]), cmap="Blues")
    for (i,j),v in np.ndenumerate(np.array([[tn, fp],[fn, tp]])):
        plt.text(j, i, str(v), ha="center", va="center")
    plt.xticks([0,1], ["Pred 0","Pred 1"]); plt.yticks([0,1], ["True 0","True 1"])
    plt.title(f"Confusión – {split} (thr={m['thr']:.3f})")
    plt.savefig(GRAPHS/f"cm_{split.lower()}.png", dpi=150, bbox_inches="tight"); plt.close()

plot_and_save_curves(val_pat, "VAL")
plot_and_save_curves(test_pat,"TEST")
print("📈 Gráficos guardados en:", GRAPHS)


🧪 Reutilizando T=2.6731656060943605 y thr≈0.3651449978351593 de ft_effb3_patient_eval.json
[VAL] Throughput: 176.7 img/s
[TEST] Throughput: 140.0 img/s
✅ CSV guardados en /content/drive/MyDrive/CognitivaAI/ft_effb3_colab
VAL: {'AUC': 0.4398148148148148, 'PR-AUC': 0.39758997019708764, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'thr': 0.3651449978351593, 'n': 47}
TEST: {'AUC': 0.5601851851851852, 'PR-AUC': 0.45730928905033735, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'thr': 0.3651449978351593, 'n': 47}
📝 Eval JSON actualizado → /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json


  .agg(y_true=("y_true", lambda v: int(np.round(np.mean(v)))) ,
  .agg(y_true=("y_true", lambda v: int(np.round(np.mean(v)))) ,


📈 Gráficos guardados en: /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics
