In [1]:
# %% [markdown]
# # Setup: librerías y GPU

!pip -q install timm==1.0.9 --upgrade --no-cache-dir

import os, sys, json, math, time, shutil, gc, random, csv
from pathlib import Path
from dataclasses import dataclass
from typing import Tuple, List, Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import timm
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix

# Reproducibilidad básica
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
if device.type == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))
    print("CUDA capability ok")
else:
    print("⚠️ Sin GPU: activa GPU en Runtime > Change runtime type > GPU")


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[?25hDevice: cuda
GPU: Tesla T4
CUDA capability ok


In [2]:
# %% [markdown]
# # Drive y rutas del proyecto

from google.colab import drive
try:
    drive.mount('/content/drive', force_remount=False)
except Exception as e:
    print("⚠️ Ya montado o directorio no vacío. Si da error, reinicia el kernel y vuelve a ejecutar.")

BASE = Path("/content/drive/MyDrive/CognitivaAI")
DATA = BASE / "oas1_data"
OUT  = BASE / "ft_effb3_colab"
GRAPHS = OUT / "graphs_from_metrics"

OUT.mkdir(parents=True, exist_ok=True)
GRAPHS.mkdir(parents=True, exist_ok=True)

print("BASE :", BASE)
print("DATA :", DATA)
print("OUT  :", OUT)
print("GRAPHS:", GRAPHS)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
BASE : /content/drive/MyDrive/CognitivaAI
DATA : /content/drive/MyDrive/CognitivaAI/oas1_data
OUT  : /content/drive/MyDrive/CognitivaAI/ft_effb3_colab
GRAPHS: /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics


In [3]:
# %% [markdown]
# # Configuración de entrenamiento y datos

@dataclass
class Cfg:
    val_map_csv: str = str(DATA / "oas1_val_colab_mapped.csv")
    test_map_csv:str = str(DATA / "oas1_test_colab_mapped.csv")
    # Si también quieres train explícito, deja None (se hará split del VAL en train/holdout)
    # o pon la ruta si la tienes.
    train_map_csv: Optional[str] = None

    # Entrenamiento
    model_name: str = "tf_efficientnet_b3_ns"  # timm
    image_size: int = 300
    batch_size: int = 32
    num_workers: int = 2  # Colab T4 suele ir fino con 2
    epochs: int = 5
    lr: float = 3e-4
    weight_decay: float = 1e-5
    amp: bool = True
    patience: int = 3
    label_smoothing: float = 0.0

    # Calibración
    temperature_init: float = 1.0

    # Agregación paciente
    pooling: str = "mean"  # "mean" | "max" | "median"

cfg = Cfg()
print(cfg)


Cfg(val_map_csv='/content/drive/MyDrive/CognitivaAI/oas1_data/oas1_val_colab_mapped.csv', test_map_csv='/content/drive/MyDrive/CognitivaAI/oas1_data/oas1_test_colab_mapped.csv', train_map_csv=None, model_name='tf_efficientnet_b3_ns', image_size=300, batch_size=32, num_workers=2, epochs=5, lr=0.0003, weight_decay=1e-05, amp=True, patience=3, label_smoothing=0.0, temperature_init=1.0, pooling='mean')


In [4]:
# %% [markdown]
# # Carga de mapeos y estandarización a ['patient_id','y_true','png_path']

def load_map(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    cols = [c.lower() for c in df.columns]
    df.columns = cols

    # Aceptamos diversas variantes habituales
    # target | y | label -> y_true
    if "y_true" not in df.columns:
        for cand in ["target", "y", "label"]:
            if cand in df.columns:
                df["y_true"] = df[cand].astype(int)
                break
    assert "y_true" in df.columns, f"Falta columna 'y_true' (o 'target'/'y'/'label') en {csv_path}"

    # png_path
    if "png_path" not in df.columns:
        for cand in ["path", "filepath", "png"]:
            if cand in df.columns:
                df["png_path"] = df[cand]
                break
    assert "png_path" in df.columns, f"Falta columna 'png_path' (o 'path'/'filepath') en {csv_path}"

    # patient_id
    if "patient_id" not in df.columns:
        for cand in ["subject_id", "id", "pid", "patient"]:
            if cand in df.columns:
                df["patient_id"] = df[cand]
                break
    assert "patient_id" in df.columns, f"Falta columna 'patient_id' en {csv_path}"

    return df[["patient_id", "y_true", "png_path"]].copy()

VAL = load_map(cfg.val_map_csv)
TEST = load_map(cfg.test_map_csv)

print("VAL estandarizado :", VAL.shape, "cols:", list(VAL.columns))
print(VAL.head(3), "\n")
print("TEST estandarizado:", TEST.shape, "cols:", list(TEST.columns))
print(TEST.head(3))


VAL estandarizado : (940, 3) cols: ['patient_id', 'y_true', 'png_path']
  patient_id  y_true                                           png_path
0  OAS1_0003       1  /content/drive/MyDrive/CognitivaAI/oas1_data/O...
1  OAS1_0003       1  /content/drive/MyDrive/CognitivaAI/oas1_data/O...
2  OAS1_0003       1  /content/drive/MyDrive/CognitivaAI/oas1_data/O... 

TEST estandarizado: (940, 3) cols: ['patient_id', 'y_true', 'png_path']
  patient_id  y_true                                           png_path
0  OAS1_0002       0  /content/drive/MyDrive/CognitivaAI/oas1_data/O...
1  OAS1_0002       0  /content/drive/MyDrive/CognitivaAI/oas1_data/O...
2  OAS1_0002       0  /content/drive/MyDrive/CognitivaAI/oas1_data/O...


In [5]:
# %% [markdown]
# # Split train/holdout por PACIENTE (estricto)

def patient_split(df: pd.DataFrame, holdout_patients: int = 10, seed: int = 42):
    set_seed(seed)
    pats = df["patient_id"].unique()
    pats = np.array(sorted(pats))
    rng = np.random.default_rng(seed)
    rng.shuffle(pats)
    hold = set(pats[:holdout_patients])
    df_hold = df[df.patient_id.isin(hold)].copy()
    df_train = df[~df.patient_id.isin(hold)].copy()
    return df_train, df_hold

if cfg.train_map_csv is None:
    train_df, holdout_df = patient_split(VAL, holdout_patients=10, seed=42)
else:
    TRAIN = load_map(cfg.train_map_csv)
    # Si tienes un val explícito, úsalo; si no, creamos holdout de TRAIN
    train_df, holdout_df = patient_split(TRAIN, holdout_patients=10, seed=42)

print(f"train_df  : {train_df.shape} pacientes:", train_df.patient_id.nunique())
print(f"holdout_df: {holdout_df.shape} pacientes:", holdout_df.patient_id.nunique())
print(f"test_df   : {TEST.shape} pacientes:", TEST.patient_id.nunique())


train_df  : (740, 3) pacientes: 37
holdout_df: (200, 3) pacientes: 10
test_df   : (940, 3) pacientes: 47


In [6]:
# %% [markdown]
# # Copia de imágenes a SSD local para acelerar DataLoader

from tqdm import tqdm

CACHE_DIR = Path("/content/mri_cache")
CACHE_DIR.mkdir(exist_ok=True, parents=True)

def _copy_list(paths: List[str]):
    start = time.time()
    new_paths = []
    missed = 0
    for p in tqdm(paths, desc="Copiando"):
        src = Path(p)
        dst = CACHE_DIR / src.name
        try:
            if not dst.exists():
                shutil.copy2(src, dst)
            new_paths.append(str(dst))
        except Exception:
            missed += 1
            new_paths.append(p)  # fallback
    secs = time.time() - start
    return new_paths, secs, missed

def cache_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    uniq_paths = df["png_path"].unique().tolist()
    new_paths, secs, missed = _copy_list(uniq_paths)
    # map back
    mapping = dict(zip(uniq_paths, new_paths))
    df2 = df.copy()
    df2["png_path"] = df2["png_path"].map(mapping)
    print(f"✅ Copiados {len(uniq_paths)-missed} / {len(uniq_paths)} en {secs:.1f}s | {len(uniq_paths)/max(secs,1):.1f} f/s")
    return df2

VAL_c = cache_dataframe(VAL)
TEST_c = cache_dataframe(TEST)

# Rehacer splits con paths cacheados
if cfg.train_map_csv is None:
    train_df_c, holdout_df_c = patient_split(VAL_c, holdout_patients=10, seed=42)
else:
    TRAIN_c = cache_dataframe(TRAIN)
    train_df_c, holdout_df_c = patient_split(TRAIN_c, holdout_patients=10, seed=42)

print("train_df_c:", train_df_c.shape, "holdout_df_c:", holdout_df_c.shape, "test_df_c:", TEST_c.shape)


Copiando: 100%|██████████| 940/940 [03:18<00:00,  4.73it/s]


✅ Copiados 940 / 940 en 198.8s | 4.7 f/s


Copiando: 100%|██████████| 940/940 [03:12<00:00,  4.87it/s]

✅ Copiados 940 / 940 en 192.9s | 4.9 f/s
train_df_c: (740, 3) holdout_df_c: (200, 3) test_df_c: (940, 3)





In [7]:
# %% [markdown]
# # Dataset MRI (PNG)

from torchvision import transforms

def build_transforms(image_size: int):
    train_tf = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.25]) if False else transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
    ])
    eval_tf = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
    ])
    return train_tf, eval_tf

class SlicesDS(Dataset):
    def __init__(self, df: pd.DataFrame, tfm, gray_to_rgb=True):
        self.df = df.reset_index(drop=True)
        self.tfm = tfm
        self.gray_to_rgb = gray_to_rgb

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        path = row["png_path"]
        y = int(row["y_true"])
        img = Image.open(path).convert("RGB")  # forzamos RGB
        img = self.tfm(img)
        return img, y, row["patient_id"]

train_tf, eval_tf = build_transforms(cfg.image_size)

train_ds   = SlicesDS(train_df_c,   train_tf)
holdout_ds = SlicesDS(holdout_df_c, eval_tf)
test_ds    = SlicesDS(TEST_c,       eval_tf)

train_dl   = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True,
                        num_workers=cfg.num_workers, pin_memory=True, drop_last=False)
holdout_dl = DataLoader(holdout_ds, batch_size=cfg.batch_size, shuffle=False,
                        num_workers=cfg.num_workers, pin_memory=True, drop_last=False)
test_dl    = DataLoader(test_ds, batch_size=cfg.batch_size, shuffle=False,
                        num_workers=cfg.num_workers, pin_memory=True, drop_last=False)

print("DL ok")


DL ok


In [8]:
# %% [markdown]
# # Modelo EfficientNet-B3

def build_model(name: str, pretrained: bool = True):
    model = timm.create_model(name, pretrained=pretrained, num_classes=1)
    return model

model = build_model(cfg.model_name, pretrained=True).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
scaler = torch.amp.GradScaler('cuda', enabled=(device.type=="cuda" and cfg.amp))
best_val_auc = -1.0
best_path = OUT / "best_ft_effb3.pth"
print(model.__class__.__name__)


  model = create_fn(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


EfficientNet


In [9]:
# %% [markdown]
# # Entrenamiento + validación (AUC holdout)

from tqdm.auto import tqdm

def run_epoch(dataloader, model, train=True):
    model.train(train)
    losses = []
    all_logits = []
    all_labels = []
    for xb, yb, _ in tqdm(dataloader, leave=False):
        xb, yb = xb.to(device), yb.float().to(device)
        with torch.set_grad_enabled(train):
            with torch.amp.autocast('cuda', enabled=(device.type=="cuda" and cfg.amp)):
                logits = model(xb).squeeze(1)
                loss = criterion(logits, yb)
        if train:
            optimizer.zero_grad(set_to_none=True)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        losses.append(loss.detach().item())
        all_logits.append(logits.detach().cpu().numpy())
        all_labels.append(yb.detach().cpu().numpy())
    logits = np.concatenate(all_logits) if all_logits else np.array([])
    labels = np.concatenate(all_labels) if all_labels else np.array([])
    return float(np.mean(losses)) if losses else 0.0, logits, labels

def eval_auc_pr(labels, logits):
    if len(labels)==0:
        return dict(AUC=np.nan, PR_AUC=np.nan)
    try:
        auc = roc_auc_score(labels, 1/(1+np.exp(-logits)))
    except Exception:
        auc = np.nan
    try:
        pr = average_precision_score(labels, 1/(1+np.exp(-logits)))
    except Exception:
        pr = np.nan
    return dict(AUC=auc, PR_AUC=pr)

pat = 0
for ep in range(1, cfg.epochs+1):
    t0 = time.time()
    tr_loss, tr_logits, tr_y = run_epoch(train_dl, model, train=True)
    va_loss, va_logits, va_y = run_epoch(holdout_dl, model, train=False)
    metrics = eval_auc_pr(va_y, va_logits)
    dt = time.time()-t0
    print(f"Epoch {ep}/{cfg.epochs} | train loss {tr_loss:.4f} | val loss {va_loss:.4f} | "
          f"val AUC {metrics['AUC']:.3f} | val PR-AUC {metrics['PR_AUC']:.3f} | {dt/60:.1f} min")
    if metrics["AUC"] > best_val_auc:
        best_val_auc = metrics["AUC"]
        torch.save(model.state_dict(), best_path)
        pat = 0
        print("  ↳ ✅ Nuevo mejor checkpoint:", best_path.name)
    else:
        pat += 1
        if pat >= cfg.patience:
            print("⏹️ Early stopping.")
            break

print("Mejor AUC holdout:", best_val_auc)


  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/5 | train loss 1.1803 | val loss 1.2497 | val AUC 0.455 | val PR-AUC 0.308 | 0.7 min
  ↳ ✅ Nuevo mejor checkpoint: best_ft_effb3.pth


  0%|          | 0/24 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f8ab3bc4860>Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f8ab3bc4860>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1610, in _shutdown_workers
    self._pin_memory_thread.join()
  File "/usr/lib/python3.12/threading.py", line 1146, in join
    raise RuntimeError("cannot join current thread")
RuntimeError: cannot join current thread

Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f8ab3bc4860>    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, i

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 2/5 | train loss 0.6283 | val loss 1.6452 | val AUC 0.442 | val PR-AUC 0.310 | 0.2 min


  auc = roc_auc_score(labels, 1/(1+np.exp(-logits)))
  pr = average_precision_score(labels, 1/(1+np.exp(-logits)))


  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 3/5 | train loss 0.3465 | val loss 4.9224 | val AUC 0.423 | val PR-AUC 0.293 | 0.4 min


  auc = roc_auc_score(labels, 1/(1+np.exp(-logits)))
  pr = average_precision_score(labels, 1/(1+np.exp(-logits)))


  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 4/5 | train loss 0.2579 | val loss 4.0365 | val AUC 0.435 | val PR-AUC 0.318 | 0.4 min
⏹️ Early stopping.
Mejor AUC holdout: 0.45488095238095244


  auc = roc_auc_score(labels, 1/(1+np.exp(-logits)))
  pr = average_precision_score(labels, 1/(1+np.exp(-logits)))


In [10]:
# %% [markdown]
# # Inferencia en holdout y test (logits por slice)

model.load_state_dict(torch.load(best_path, map_location=device))
model.eval()

@torch.inference_mode()
def predict_slices(dataloader):
    all_logits, all_labels, all_pids = [], [], []
    for xb, yb, pids in tqdm(dataloader):
        xb = xb.to(device)
        with torch.amp.autocast('cuda', enabled=(device.type=="cuda" and cfg.amp)):
            logits = model(xb).squeeze(1)
        all_logits.append(logits.float().cpu().numpy())
        all_labels.append(yb.numpy().astype(int))
        all_pids.extend(list(pids))
    logits = np.concatenate(all_logits) if all_logits else np.array([])
    labels = np.concatenate(all_labels) if all_labels else np.array([])
    return logits, labels, np.array(all_pids)

val_logits, val_y, val_pids = predict_slices(holdout_dl)
test_logits, test_y, test_pids = predict_slices(test_dl)

def to_df(logits, labels, pids, df_src):
    # align paths por orden del dataloader (opcional)
    return pd.DataFrame({
        "patient_id": pids,
        "y_true": labels.astype(int),
        "logits": logits.astype(float),
    })

val_slices_df  = to_df(val_logits,  val_y,  val_pids,  holdout_df_c)
test_slices_df = to_df(test_logits, test_y, test_pids, TEST_c)

val_slices_df.head(), test_slices_df.head()


  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f8ab3bc4860>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f8ab3bc4860>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

(  patient_id  y_true    logits
 0  OAS1_0022       1 -3.466797
 1  OAS1_0022       1 -0.086426
 2  OAS1_0022       1 -1.778320
 3  OAS1_0022       1  0.452637
 4  OAS1_0022       1 -0.616211,
   patient_id  y_true    logits
 0  OAS1_0002       0  1.630859
 1  OAS1_0002       0  0.195312
 2  OAS1_0002       0  2.990234
 3  OAS1_0002       0 -3.125000
 4  OAS1_0002       0  0.219116)

In [11]:
# %% [markdown]
# # Calibración por temperature scaling (en holdout)

class _TempScale(nn.Module):
    def __init__(self, T_init=1.0):
        super().__init__()
        self.T = nn.Parameter(torch.tensor(float(T_init)))

    def forward(self, logits):
        return logits / self.T.clamp(min=1e-3)

def fit_temperature(logits: np.ndarray, labels: np.ndarray, T_init=1.0, lr=0.01, steps=500):
    modelT = _TempScale(T_init).to(device)
    opt = torch.optim.LBFGS(modelT.parameters(), lr=lr, max_iter=steps)
    x = torch.tensor(logits, dtype=torch.float32, device=device)
    y = torch.tensor(labels, dtype=torch.float32, device=device)

    bce = nn.BCEWithLogitsLoss()

    def closure():
        opt.zero_grad(set_to_none=True)
        out = modelT(x)
        loss = bce(out, y)
        loss.backward()
        return loss

    opt.step(closure)
    with torch.no_grad():
        T_val = float(modelT.T.detach().cpu().item())
    return T_val

T = fit_temperature(val_logits, val_y, T_init=cfg.temperature_init, lr=0.01, steps=200)
print(f"✅ Temperature fitted: T = {T:.4f}")

def sigmoid(z): return 1/(1+np.exp(-z))

val_prob  = sigmoid(val_logits / T)
test_prob = sigmoid(test_logits / T)

print("Holdout AUC/PR-AUC tras calibración:",
      roc_auc_score(val_y, val_prob).round(3),
      average_precision_score(val_y, val_prob).round(3))
print("Test   AUC/PR-AUC tras calibración:",
      roc_auc_score(test_y, test_prob).round(3),
      average_precision_score(test_y, test_prob).round(3))


Consider using tensor.detach() first. (Triggered internally at /pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)
  loss = float(closure())


✅ Temperature fitted: T = 2.3027
Holdout AUC/PR-AUC tras calibración: 0.455 0.308
Test   AUC/PR-AUC tras calibración: 0.558 0.488


In [12]:
# %% [markdown]
# # Agregar por paciente y calcular métricas

def aggregate_patient(df_slices: pd.DataFrame, pooling="mean"):
    g = df_slices.groupby("patient_id")
    if pooling == "mean":
        probs = g["y_score"].mean()
    elif pooling == "max":
        probs = g["y_score"].max()
    elif pooling == "median":
        probs = g["y_score"].median()
    else:
        raise ValueError("pooling desconocido")
    labels = g["y_true"].mean().round().astype(int)
    out = pd.DataFrame({"patient_id": probs.index, "y_true": labels.values, "y_score": probs.values})
    return out

val_calib_slices = pd.DataFrame({"patient_id": val_pids, "y_true": val_y, "y_score": val_prob})
test_calib_slices= pd.DataFrame({"patient_id": test_pids,"y_true": test_y,"y_score": test_prob})

val_patient  = aggregate_patient(val_calib_slices, cfg.pooling)
test_patient = aggregate_patient(test_calib_slices, cfg.pooling)

def search_best_thr(df: pd.DataFrame, mode="f1", force_recall1=False):
    y_true = df.y_true.values
    y_score= df.y_score.values
    thrs = np.linspace(0.05, 0.95, 181)
    best = (None, -1)
    for thr in thrs:
        pred = (y_score >= thr).astype(int)
        P = (pred[y_true==1]==1).sum()
        N = (pred[y_true==0]==1).sum()
        TP = ((pred==1)&(y_true==1)).sum()
        FP = ((pred==1)&(y_true==0)).sum()
        TN = ((pred==0)&(y_true==0)).sum()
        FN = ((pred==0)&(y_true==1)).sum()
        prec = TP/(TP+FP+1e-9)
        rec  = TP/(TP+FN+1e-9)
        acc  = (TP+TN)/len(y_true)
        f1   = 2*prec*rec/(prec+rec+1e-9)
        if force_recall1 and rec < 0.999:
            continue
        score = f1 if mode=="f1" else acc
        if score > best[1]:
            best = (thr, score)
    return best[0] if best[0] is not None else 0.5

# por defecto: buscamos el mejor F1 respetando recall=1 si es posible
thr = search_best_thr(val_patient, mode="f1", force_recall1=True)
if thr is None:
    thr = 0.3651449978351593  # fallback a valor conocido de runs previos

def patient_metrics(df: pd.DataFrame, thr: float):
    y_true = df.y_true.values
    y_score= df.y_score.values
    y_pred = (y_score >= thr).astype(int)
    auc    = roc_auc_score(y_true, y_score)
    pr     = average_precision_score(y_true, y_score)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    acc = (tp+tn)/len(y_true)
    P = tp/(tp+fp+1e-9)
    R = tp/(tp+fn+1e-9)
    return dict(AUC=auc, PR_AUC=pr, Acc=acc, P=P, R=R, thr=thr, n=len(y_true), TP=int(tp), FP=int(fp), TN=int(tn), FN=int(fn))

VAL_MET = patient_metrics(val_patient, thr)
TEST_MET= patient_metrics(test_patient, thr)

print("Umbral seleccionado (val):", thr)
print("VAL :", VAL_MET)
print("TEST:", TEST_MET)


Umbral seleccionado (val): 0.05
VAL : {'AUC': np.float64(0.5238095238095237), 'PR_AUC': np.float64(0.43333333333333335), 'Acc': np.float64(0.3), 'P': np.float64(0.29999999997), 'R': np.float64(0.9999999996666666), 'thr': np.float64(0.05), 'n': 10, 'TP': 3, 'FP': 7, 'TN': 0, 'FN': 0}
TEST: {'AUC': np.float64(0.5851851851851851), 'PR_AUC': np.float64(0.5821968787479187), 'Acc': np.float64(0.425531914893617), 'P': np.float64(0.42553191488456316), 'R': np.float64(0.99999999995), 'thr': np.float64(0.05), 'n': 47, 'TP': 20, 'FP': 27, 'TN': 0, 'FN': 0}


In [13]:
# %% [markdown]
# # Persistencia y gráficas

val_slices_csv  = OUT / "val_slices_preds.csv"
test_slices_csv = OUT / "test_slices_preds.csv"
val_patient_csv = OUT / "val_patient_preds.csv"
test_patient_csv= OUT / "test_patient_preds.csv"
eval_json       = OUT / "ft_effb3_patient_eval.json"

val_calib_slices.to_csv(val_slices_csv, index=False)
test_calib_slices.to_csv(test_slices_csv, index=False)
val_patient.to_csv(val_patient_csv, index=False)
test_patient.to_csv(test_patient_csv, index=False)

payload = {
    "pooling_used": cfg.pooling,
    "temperature": float(T),
    "threshold": float(thr),
    "val_metrics": VAL_MET,
    "test_metrics": TEST_MET,
}
with open(eval_json, "w", encoding="utf-8") as f:
    json.dump(payload, f, indent=2)

print("📄 Guardados:")
print(" -", val_slices_csv)
print(" -", test_slices_csv)
print(" -", val_patient_csv)
print(" -", test_patient_csv)
print(" -", eval_json)

# --------- Gráficas sencillas ----------
def bar_metric(d, title, fname):
    fig = plt.figure(figsize=(5,3))
    keys = ["AUC","PR_AUC","Acc","P","R"]
    vals = [d[k] for k in keys]
    plt.bar(keys, vals)
    plt.ylim(0,1.05)
    plt.title(title)
    plt.grid(axis="y", alpha=0.3)
    fig.tight_layout()
    path = GRAPHS / fname
    plt.savefig(path, dpi=150)
    plt.close(fig)
    return path

p1 = bar_metric(VAL_MET, "VAL metrics (patient-level)", "ft_b3_bars_val.png")
p2 = bar_metric(TEST_MET,"TEST metrics (patient-level)","ft_b3_bars_test.png")

# punto PR (precision-recall en el umbral)
def pr_point(df, title, fname, thr):
    y = df.y_true.values
    s = df.y_score.values
    pred = (s>=thr).astype(int)
    tp = ((pred==1)&(y==1)).sum()
    fp = ((pred==1)&(y==0)).sum()
    fn = ((pred==0)&(y==1)).sum()
    prec = tp/(tp+fp+1e-9)
    rec  = tp/(tp+fn+1e-9)
    fig = plt.figure(figsize=(4,4))
    plt.scatter([rec],[prec], s=120)
    plt.xlim(0,1); plt.ylim(0,1)
    plt.xlabel("Recall"); plt.ylabel("Precision")
    plt.title(title + f"\nthr={thr:.3f}")
    plt.grid(alpha=0.3)
    fig.tight_layout()
    path = GRAPHS / fname
    plt.savefig(path, dpi=150); plt.close(fig)
    return path

p3 = pr_point(test_patient, "PR point (TEST)", "ft_b3_pr_point.png", thr)

# matriz de confusión
def plot_confusion(d, title, fname):
    tp, fp, tn, fn = d["TP"], d["FP"], d["TN"], d["FN"]
    mat = np.array([[tn, fp],[fn, tp]])
    fig = plt.figure(figsize=(4,4))
    plt.imshow(mat, cmap="Blues")
    for (i,j),v in np.ndenumerate(mat):
        plt.text(j, i, str(v), ha="center", va="center", fontsize=14)
    plt.xticks([0,1],["Pred 0","Pred 1"])
    plt.yticks([0,1],["True 0","True 1"])
    plt.title(title)
    fig.tight_layout()
    path = GRAPHS / fname
    plt.savefig(path, dpi=150); plt.close(fig)
    return path

p4 = plot_confusion(TEST_MET, "Confusion (TEST)", "ft_b3_patient_confusion_from_metrics.png")

print("🖼️ Gráficos guardados en:", GRAPHS)
for p in [p1,p2,p3,p4]:
    print(" -", p)


📄 Guardados:
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/val_slices_preds.csv
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/test_slices_preds.csv
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/val_patient_preds.csv
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/test_patient_preds.csv
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json
🖼️ Gráficos guardados en: /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics/ft_b3_bars_val.png
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics/ft_b3_bars_test.png
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics/ft_b3_pr_point.png
 - /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics/ft_b3_patient_confusion_from_metrics.png


In [14]:
# %% [markdown]
# # Resumen final (rutas + métricas)

from datetime import datetime
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

print(f"🕒 Finalizado: {ts}")
print("\n📂 Salidas principales:")
print("  • Checkpoint:", str(OUT / "best_ft_effb3.pth"))
print("  • Eval JSON :", str(OUT / "ft_effb3_patient_eval.json"))
print("  • CSV VAL   :", str(OUT / "val_patient_preds.csv"))
print("  • CSV TEST  :", str(OUT / "test_patient_preds.csv"))
print("  • Gráficas  :", str(GRAPHS))

print("\n📊 Métricas (patient-level):")
print("VAL :", VAL_MET)
print("TEST:", TEST_MET)


🕒 Finalizado: 2025-08-25 00:06:26

📂 Salidas principales:
  • Checkpoint: /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/best_ft_effb3.pth
  • Eval JSON : /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/ft_effb3_patient_eval.json
  • CSV VAL   : /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/val_patient_preds.csv
  • CSV TEST  : /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/test_patient_preds.csv
  • Gráficas  : /content/drive/MyDrive/CognitivaAI/ft_effb3_colab/graphs_from_metrics

📊 Métricas (patient-level):
VAL : {'AUC': np.float64(0.5238095238095237), 'PR_AUC': np.float64(0.43333333333333335), 'Acc': np.float64(0.3), 'P': np.float64(0.29999999997), 'R': np.float64(0.9999999996666666), 'thr': np.float64(0.05), 'n': 10, 'TP': 3, 'FP': 7, 'TN': 0, 'FN': 0}
TEST: {'AUC': np.float64(0.5851851851851851), 'PR_AUC': np.float64(0.5821968787479187), 'Acc': np.float64(0.425531914893617), 'P': np.float64(0.42553191488456316), 'R': np.float64(0.99999999995), 'thr': np.float64(0.05), 