In [1]:
# Celda 0: Instalaci√≥n (si lo necesitas). TimM para EfficientNet-B3.
!pip -q install timm==0.9.16

import os, sys, json, time, math, random, shutil, gc
from pathlib import Path
from dataclasses import dataclass
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.io import read_image
from PIL import Image

import timm
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_score, recall_score, confusion_matrix

# Determinismo razonable
def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

print("PyTorch:", torch.__version__, "| CUDA disponible:", torch.cuda.is_available())


[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m2.2/2.2 MB[0m [31m71.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.2/2.2 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[?25hPyTorch: 2.8.0+cu126 | CUDA disponible: True


In [2]:
# Celda 1: Montar Drive y preparar rutas
from google.colab import drive
drive.mount('/content/drive')

BASE_DIR   = Path("/content/drive/MyDrive/CognitivaAI")
DATA_DIR   = BASE_DIR / "oas1_data"
OUT_DIR    = BASE_DIR / "ft_effb3_stable_colab_plus"
GRAPHS_DIR = OUT_DIR / "graphs_from_metrics"
OUT_DIR.mkdir(parents=True, exist_ok=True)
GRAPHS_DIR.mkdir(parents=True, exist_ok=True)

VAL_MAP  = DATA_DIR / "oas1_val_colab_mapped.csv"
TEST_MAP = DATA_DIR / "oas1_test_colab_mapped.csv"

print("Device:", "cuda" if torch.cuda.is_available() else "cpu")
print("Mounted at /content/drive")
print("BASE   :", BASE_DIR)
print("DATA   :", DATA_DIR, "| exists:", DATA_DIR.exists())
print("OUT    :", OUT_DIR)
print("GRAPHS :", GRAPHS_DIR)
print("VAL_MAP:", VAL_MAP, "| exists:", VAL_MAP.exists())
print("TEST_MAP:", TEST_MAP, "| exists:", TEST_MAP.exists())


Mounted at /content/drive
Device: cuda
Mounted at /content/drive
BASE   : /content/drive/MyDrive/CognitivaAI
DATA   : /content/drive/MyDrive/CognitivaAI/oas1_data | exists: True
OUT    : /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus
GRAPHS : /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/graphs_from_metrics
VAL_MAP: /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_val_colab_mapped.csv | exists: True
TEST_MAP: /content/drive/MyDrive/CognitivaAI/oas1_data/oas1_test_colab_mapped.csv | exists: True


In [3]:
# Celda 2: Configuraci√≥n y lectura de CSV est√°ndar (mapped => patient_id, target, png_path)

@dataclass
class CFG:
    img_size: int = 300
    batch_size: int = 64
    num_workers: int = 2
    seeds: tuple = (41, 42, 43)    # ensemble de 3 seeds
    holdout_patients: int = 10     # desde VAL asignamos 10 pacientes a holdout (como en P9)

@dataclass
class TrainCfg:
    epochs: int = 8
    lr: float = 1e-4
    wd: float = 1e-5
    amp: bool = True
    patience: int = 3
    label_smoothing: float = 0.05   # Mejora 1: label smoothing
    use_pos_weight: bool = True    # Alternativa a smoothing (d√©jalo False si usas smoothing)
    pos_weight: float = 1.5

cfg = CFG()
tcfg = TrainCfg()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_mapped_csv(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    cols = [c.lower() for c in df.columns]
    df.columns = cols
    # Esperamos 'patient_id', 'target', 'png_path'
    assert all([c in df.columns for c in ["patient_id","target","png_path"]]), f"CSV {path} debe tener patient_id,target,png_path"
    df = df.rename(columns={"target":"y_true"})
    df["y_true"] = df["y_true"].astype(int)
    return df

val_map  = load_mapped_csv(VAL_MAP)
test_map = load_mapped_csv(TEST_MAP)

# Split train/holdout desde VAL por pacientes
patients = val_map["patient_id"].unique()
rng = np.random.default_rng(42)
rng.shuffle(patients)
holdout_pat = set(patients[:cfg.holdout_patients])

train_df   = val_map[~val_map["patient_id"].isin(holdout_pat)].reset_index(drop=True)
holdout_df = val_map[ val_map["patient_id"].isin(holdout_pat)].reset_index(drop=True)
test_df    = test_map.copy().reset_index(drop=True)

def summarize_df(df, name):
    print(f"{name}: shape={df.shape}, pacientes={df['patient_id'].nunique()}, y_mean={df['y_true'].mean():.3f}")

print("CFG", cfg)
summarize_df(val_map, "VAL mapeado")
summarize_df(test_df, "TEST mapeado")
summarize_df(train_df, "train_df")
summarize_df(holdout_df, "holdout_df")
summarize_df(test_df, "test_df")

print("\nEjemplo train_df:"); display(train_df.head(3))
print("\nEjemplo holdout_df:"); display(holdout_df.head(3))
print("\nEjemplo test_df:"); display(test_df.head(3))

CFG CFG(img_size=300, batch_size=64, num_workers=2, seeds=(41, 42, 43), holdout_patients=10)
VAL mapeado: shape=(940, 6), pacientes=47, y_mean=0.426
TEST mapeado: shape=(940, 6), pacientes=47, y_mean=0.426
train_df: shape=(740, 6), pacientes=37, y_mean=0.459
holdout_df: shape=(200, 6), pacientes=10, y_mean=0.300
test_df: shape=(940, 6), pacientes=47, y_mean=0.426

Ejemplo train_df:


Unnamed: 0,png_path,y_true,patient_id,scan_id,source_hdr,has_mask
0,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0003,OAS1_0003_MR1,DATA\OAS1_RAW\OAS1_0003_MR1\RAW\OAS1_0003_MR1_...,1
1,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0003,OAS1_0003_MR1,DATA\OAS1_RAW\OAS1_0003_MR1\RAW\OAS1_0003_MR1_...,1
2,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0003,OAS1_0003_MR1,DATA\OAS1_RAW\OAS1_0003_MR1\RAW\OAS1_0003_MR1_...,1



Ejemplo holdout_df:


Unnamed: 0,png_path,y_true,patient_id,scan_id,source_hdr,has_mask
0,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0022,OAS1_0022_MR1,DATA\OAS1_RAW\OAS1_0022_MR1\RAW\OAS1_0022_MR1_...,1
1,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0022,OAS1_0022_MR1,DATA\OAS1_RAW\OAS1_0022_MR1\RAW\OAS1_0022_MR1_...,1
2,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,1,OAS1_0022,OAS1_0022_MR1,DATA\OAS1_RAW\OAS1_0022_MR1\RAW\OAS1_0022_MR1_...,1



Ejemplo test_df:


Unnamed: 0,png_path,y_true,patient_id,scan_id,source_hdr,has_mask
0,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,0,OAS1_0002,OAS1_0002_MR1,DATA\OAS1_RAW\OAS1_0002_MR1\RAW\OAS1_0002_MR1_...,1
1,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,0,OAS1_0002,OAS1_0002_MR1,DATA\OAS1_RAW\OAS1_0002_MR1\RAW\OAS1_0002_MR1_...,1
2,/content/drive/MyDrive/CognitivaAI/oas1_data/O...,0,OAS1_0002,OAS1_0002_MR1,DATA\OAS1_RAW\OAS1_0002_MR1\RAW\OAS1_0002_MR1_...,1


In [7]:
# Celda 3: Dataset + Dataloaders con RandAugment suave y normalizaci√≥n simple

from torchvision.transforms import RandAugment, ColorJitter

class MRISliceDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self): return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row["png_path"]).convert("RGB")  # RGB para EffNet
        if self.transform is not None:
            img = self.transform(img)
        y = float(row["y_true"])
        return img, torch.tensor([y], dtype=torch.float32), row["patient_id"]

# Transforms
train_tf = T.Compose([
    T.Resize((cfg.img_size, cfg.img_size)),
    RandAugment(num_ops=2, magnitude=5),         # Mejora 1: RandAugment suave
    ColorJitter(brightness=0.05, contrast=0.05),
    T.ToTensor(),
    T.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])
val_tf = T.Compose([
    T.Resize((cfg.img_size, cfg.img_size)),
    T.ToTensor(),
    T.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])

train_ds   = MRISliceDataset(train_df,   transform=train_tf)
holdout_ds = MRISliceDataset(holdout_df, transform=val_tf)
test_ds    = MRISliceDataset(test_df,    transform=val_tf)

def make_loader(ds, shuffle, bs=cfg.batch_size):
    return DataLoader(ds, batch_size=bs, shuffle=shuffle, num_workers=cfg.num_workers, pin_memory=True)

train_loader   = make_loader(train_ds, shuffle=True)
holdout_loader = make_loader(holdout_ds, shuffle=False)
test_loader    = make_loader(test_ds, shuffle=False)

print("Loaders creados. Batches train/holdout/test:",
      len(train_loader), len(holdout_loader), len(test_loader))


Loaders creados. Batches train/holdout/test: 12 4 15


In [8]:
# Celda 4: Modelo EffNet-B3 + loss (label smoothing) + AdamW + warmup+cosine + AMP + early stopping

def create_model(num_classes=1):
    model = timm.create_model("tf_efficientnet_b3_ns", pretrained=True, in_chans=3, num_classes=num_classes)
    return model

def create_loss():
    if tcfg.use_pos_weight:
        pw = torch.tensor([tcfg.pos_weight], device=device)
        return nn.BCEWithLogitsLoss(pos_weight=pw)
    else:
        # Removed label_smoothing due to TypeError. Using pos_weight instead.
        return nn.BCEWithLogitsLoss()

def cosine_with_warmup(optimizer, total_steps, warmup_ratio=0.1):
    warmup_steps = max(1, int(total_steps * warmup_ratio))
    def lr_lambda(current_step):
        if current_step < warmup_steps:
            return float(current_step) / float(max(1, warmup_steps))
        # Cosine decay hasta 0
        progress = (current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
        return 0.5 * (1.0 + math.cos(math.pi * progress))
    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

def evaluate_auc_pr(model, loader):
    model.eval()
    all_logits, all_y = [], []
    with torch.no_grad(), torch.amp.autocast('cuda', enabled=(device.type=="cuda" and tcfg.amp)):
        for x, y, _ in loader:
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            logits = model(x)
            all_logits.append(logits.detach().float().cpu())
            all_y.append(y.detach().float().cpu())
    all_logits = torch.cat(all_logits, 0).squeeze(1).numpy()
    all_y = torch.cat(all_y, 0).squeeze(1).numpy()
    probs = 1/(1+np.exp(-all_logits))
    auc  = roc_auc_score(all_y, probs) if len(np.unique(all_y))>1 else np.nan
    pr   = average_precision_score(all_y, probs) if len(np.unique(all_y))>1 else np.nan
    return auc, pr

def train_one_seed(seed, train_loader, holdout_loader):
    set_seed(seed)
    model = create_model().to(device)
    loss_fn = create_loss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=tcfg.lr, weight_decay=tcfg.wd)

    total_steps = tcfg.epochs * len(train_loader)
    scheduler = cosine_with_warmup(optimizer, total_steps, warmup_ratio=0.1)
    scaler = torch.amp.GradScaler('cuda', enabled=(device.type=="cuda" and tcfg.amp))

    best_auc = -1.0
    best_path = OUT_DIR / f"effb3_plus_seed{seed}.pth"
    history = []
    no_improve = 0

    print(f"Seed {seed} | Epochs={tcfg.epochs} | steps/epoch={len(train_loader)}")
    step = 0
    for epoch in range(1, tcfg.epochs+1):
        model.train()
        losses = []
        pbar = tqdm(train_loader, desc=f"Seed {seed} | Epoch {epoch}/{tcfg.epochs}", leave=False)
        for x, y, _ in pbar:
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            with torch.amp.autocast('cuda', enabled=(device.type=="cuda" and tcfg.amp)):
                logits = model(x).squeeze(1)
                loss = loss_fn(logits, y.squeeze(1))
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            losses.append(loss.item())
            step += 1
            pbar.set_postfix(loss=np.mean(losses))

        val_auc, val_pr = evaluate_auc_pr(model, holdout_loader)
        history.append({"epoch":epoch, "loss":float(np.mean(losses)), "holdout_auc":float(val_auc), "holdout_pr":float(val_pr)})
        print(f"  -> Holdout AUC={val_auc:.3f} | PR-AUC={val_pr:.3f} | loss={np.mean(losses):.4f}")

        if val_auc > best_auc:
            best_auc = val_auc
            torch.save(model.state_dict(), best_path)
            print(f"  üíæ Nuevo mejor checkpoint (seed {seed}) en: {best_path} | Holdout AUC={best_auc:.3f}")
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= tcfg.patience:
                print("  ‚èπÔ∏è Early stopping por paciencia.")
                break

    # guardar historia
    hist_path = OUT_DIR / f"train_history_plus_seed{seed}.json"
    with open(hist_path, "w") as f:
        json.dump(history, f, indent=2)
    return str(best_path), best_auc

In [9]:
# Celda 5: Entrenar varias seeds y elegir el mejor por AUC holdout
ckpts = []
for sd in cfg.seeds:
    bp, auc_h = train_one_seed(sd, train_loader, holdout_loader)
    ckpts.append({"seed":sd, "ckpt":bp, "holdout_auc":float(auc_h)})

ckpts_sorted = sorted(ckpts, key=lambda d: d["holdout_auc"], reverse=True)
best = ckpts_sorted[0]
BEST_CKPT = OUT_DIR / "best_effb3_stable_plus.pth"
shutil.copyfile(best["ckpt"], BEST_CKPT)

print("‚úÖ Checkpoints:", [c["ckpt"] for c in ckpts_sorted])
print("üèÜ Mejor:", best)
print("‚û°Ô∏è Copiado como BEST:", BEST_CKPT)

with open(OUT_DIR/"train_history_stable_plus_summary.json","w") as f:
    json.dump({"candidates":ckpts_sorted, "best":best, "best_ckpt":str(BEST_CKPT)}, f, indent=2)


  model = create_fn(


Seed 41 | Epochs=8 | steps/epoch=12




  -> Holdout AUC=0.431 | PR-AUC=0.293 | loss=1.4411
  üíæ Nuevo mejor checkpoint (seed 41) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed41.pth | Holdout AUC=0.431




  -> Holdout AUC=0.385 | PR-AUC=0.259 | loss=0.7813




  -> Holdout AUC=0.410 | PR-AUC=0.264 | loss=0.4799




  -> Holdout AUC=0.437 | PR-AUC=0.286 | loss=0.3659
  üíæ Nuevo mejor checkpoint (seed 41) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed41.pth | Holdout AUC=0.437




  -> Holdout AUC=0.459 | PR-AUC=0.298 | loss=0.2786
  üíæ Nuevo mejor checkpoint (seed 41) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed41.pth | Holdout AUC=0.459




  -> Holdout AUC=0.372 | PR-AUC=0.257 | loss=0.2042




  -> Holdout AUC=0.397 | PR-AUC=0.262 | loss=0.1659




  -> Holdout AUC=0.385 | PR-AUC=0.248 | loss=0.1780
  ‚èπÔ∏è Early stopping por paciencia.


  model = create_fn(


Seed 42 | Epochs=8 | steps/epoch=12




  -> Holdout AUC=0.527 | PR-AUC=0.363 | loss=1.3326
  üíæ Nuevo mejor checkpoint (seed 42) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed42.pth | Holdout AUC=0.527




  -> Holdout AUC=0.432 | PR-AUC=0.299 | loss=0.8189




  -> Holdout AUC=0.387 | PR-AUC=0.256 | loss=0.4927




  -> Holdout AUC=0.388 | PR-AUC=0.262 | loss=0.3192
  ‚èπÔ∏è Early stopping por paciencia.


  model = create_fn(


Seed 43 | Epochs=8 | steps/epoch=12




  -> Holdout AUC=0.475 | PR-AUC=0.312 | loss=1.2818
  üíæ Nuevo mejor checkpoint (seed 43) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed43.pth | Holdout AUC=0.475




  -> Holdout AUC=0.491 | PR-AUC=0.317 | loss=0.7585
  üíæ Nuevo mejor checkpoint (seed 43) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed43.pth | Holdout AUC=0.491




  -> Holdout AUC=0.497 | PR-AUC=0.366 | loss=0.4549
  üíæ Nuevo mejor checkpoint (seed 43) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed43.pth | Holdout AUC=0.497




  -> Holdout AUC=0.507 | PR-AUC=0.363 | loss=0.3359
  üíæ Nuevo mejor checkpoint (seed 43) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed43.pth | Holdout AUC=0.507




  -> Holdout AUC=0.520 | PR-AUC=0.352 | loss=0.2629
  üíæ Nuevo mejor checkpoint (seed 43) en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed43.pth | Holdout AUC=0.520




  -> Holdout AUC=0.440 | PR-AUC=0.300 | loss=0.2644




  -> Holdout AUC=0.487 | PR-AUC=0.343 | loss=0.2122




  -> Holdout AUC=0.450 | PR-AUC=0.291 | loss=0.1965
  ‚èπÔ∏è Early stopping por paciencia.
‚úÖ Checkpoints: ['/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed42.pth', '/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed43.pth', '/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed41.pth']
üèÜ Mejor: {'seed': 42, 'ckpt': '/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_plus_seed42.pth', 'holdout_auc': 0.5273214285714286}
‚û°Ô∏è Copiado como BEST: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/best_effb3_stable_plus.pth


In [10]:
# Celda 6: Inferencia con TTA + ensemble opcional + pooling por paciente (mean y top-k mean)

def load_model_from_ckpt(ckpt_path: Path):
    m = create_model().to(device)
    sd = torch.load(ckpt_path, map_location=device)
    m.load_state_dict(sd)
    m.eval()
    return m

def tta_logits(model, x):
    # 4 vistas: original, flip H, flip V, rot90
    outs = []
    outs.append(model(x))
    outs.append(model(torch.flip(x, dims=[-1])))
    outs.append(model(torch.flip(x, dims=[-2])))
    outs.append(model(torch.rot90(x, k=1, dims=[-2, -1])))
    return torch.stack(outs, dim=0).mean(dim=0)

@torch.no_grad()
def predict_slices(models, loader, use_tta=True):
    all_rows = []
    for x, y, pids in tqdm(loader, desc="Inferencia slices", leave=False):
        x = x.to(device, non_blocking=True)
        # Ensemble: media de logits de todos los modelos
        logits_list = []
        for m in models:
            if use_tta:
                logits_list.append(tta_logits(m, x))
            else:
                logits_list.append(m(x))
        logits = torch.stack(logits_list, 0).mean(0).squeeze(1)
        probs = torch.sigmoid(logits).float().cpu().numpy()
        yb = y.squeeze(1).float().cpu().numpy()
        for pid, yy, pp in zip(pids, yb, probs):
            all_rows.append((pid, int(yy), float(pp)))
    df = pd.DataFrame(all_rows, columns=["patient_id","y_true","y_score"])
    return df

def pool_patient_mean(df_slices: pd.DataFrame) -> pd.DataFrame:
    return (df_slices
            .groupby("patient_id")
            .agg(y_true=("y_true","max"), y_score=("y_score","mean"))
            .reset_index())

def pool_patient_topk_mean(df_slices: pd.DataFrame, k:int=5) -> pd.DataFrame:
    # Ordena por score y toma las k mejores por paciente, promedio
    df_sorted = df_slices.sort_values("y_score", ascending=False)
    df_topk = (df_sorted.groupby("patient_id").head(k)
               .groupby("patient_id")
               .agg(y_true=("y_true","max"), y_score=("y_score","mean"))
               .reset_index())
    return df_topk


In [11]:
# Celda 7: Fit de Temperature Scaling (T) en HOLDOUT y selecci√≥n de umbral por recall deseado en VAL (holdout)

from scipy.optimize import minimize

def fit_temperature(logits: np.ndarray, y_true: np.ndarray, init_T=1.0):
    # Minimizar NLL (log-loss) para encontrar T
    def nll(T):
        T = float(np.maximum(T, 1e-3))
        z = logits / T
        p = 1/(1+np.exp(-z))
        eps = 1e-8
        return -np.mean(y_true*np.log(p+eps) + (1-y_true)*np.log(1-p+eps))
    res = minimize(lambda t: nll(t[0]), x0=np.array([init_T]), method="Nelder-Mead")
    best_T = float(np.maximum(res.x[0], 1e-3))
    return best_T

def choose_threshold_by_recall(y_true, y_score, recall_floor=0.90):
    # barremos thresholds y elegimos el m√°s bajo que cumple recall >= floor
    thr_space = np.linspace(0, 1, 1001)
    for thr in thr_space:
        yhat = (y_score >= thr).astype(int)
        r = recall_score(y_true, yhat, zero_division=0)
        if r >= recall_floor:
            return float(thr)
    # si no se cumple, devolver el que max recall produce (o 0.5 fallback)
    recalls = [(recall_score(y_true, (y_score>=thr).astype(int), zero_division=0), thr) for thr in thr_space]
    best = max(recalls, key=lambda t:t[0])[1]
    return float(best)


In [12]:
# Celda 8: Cargar modelos (ensemble), inferencia VAL/TEST, calibrar T en holdout, pooling top-k, m√©tricas y guardado

# 1) Cargar modelos del ensemble
ckpt_paths = [Path(c["ckpt"]) for c in ckpts]  # de Celda 5
models = [load_model_from_ckpt(p) for p in ckpt_paths]

# 2) Inferencia por slices en HOLDOUT (para calibrar T)
#    Para calibrar con logits, volvemos a obtener logits medios sin sigmoid:
@torch.no_grad()
def predict_slice_logits(models, loader, use_tta=True):
    all_logits, all_y = [], []
    for x, y, pids in tqdm(loader, desc="Inferencia logits (holdout)", leave=False):
        x = x.to(device, non_blocking=True)
        ll = []
        for m in models:
            if use_tta:
                ll.append(tta_logits(m, x).squeeze(1))
            else:
                ll.append(m(x).squeeze(1))
        logits = torch.stack(ll, 0).mean(0).float().cpu().numpy()
        all_logits.append(logits)
        all_y.append(y.squeeze(1).float().cpu().numpy())
    return np.concatenate(all_logits, 0), np.concatenate(all_y, 0)

hold_logits_s, hold_y_s = predict_slice_logits(models, holdout_loader, use_tta=True)
# Pooling mean (logits -> probs despu√©s de T)
# Para ajustar T, usamos logits a nivel slice; es suficientemente informativo
best_T = fit_temperature(hold_logits_s, hold_y_s, init_T=1.0)
print(f"üß™ Temperature scaling ajustado en HOLDOUT: T={best_T:.4f}")

# 3) Inferencia completa (VAL split = train+holdout) y TEST a nivel slice -> pasar a nivel paciente
val_slices = predict_slices(models, DataLoader(MRISliceDataset(val_map, transform=val_tf),
                                               batch_size=cfg.batch_size, shuffle=False,
                                               num_workers=cfg.num_workers, pin_memory=True),
                            use_tta=True)
test_slices = predict_slices(models, test_loader, use_tta=True)

def apply_temperature_inplace(df_slices, T):
    # Convertimos y_score a logits, dividimos por T y re-sigmoid
    p = np.clip(df_slices["y_score"].values, 1e-6, 1-1e-6)
    logits = np.log(p/(1-p))
    pT = 1/(1+np.exp(-(logits/T)))
    df_slices["y_score"] = pT
    return df_slices

val_slices_T  = apply_temperature_inplace(val_slices.copy(), best_T)
test_slices_T = apply_temperature_inplace(test_slices.copy(), best_T)

# Pooling por paciente: mean y top-k mean
VAL_MEAN   = pool_patient_mean(val_slices_T)
VAL_TOPK   = pool_patient_topk_mean(val_slices_T, k=5)
TEST_MEAN  = pool_patient_mean(test_slices_T)
TEST_TOPK  = pool_patient_topk_mean(test_slices_T, k=5)

# 4) Elegir umbral por recall en VAL (elige set: mean o top-k)
val_choice = VAL_TOPK    # ‚Üê usa TOP-K; si prefieres mean, cambia aqu√≠
thr = choose_threshold_by_recall(val_choice["y_true"].values, val_choice["y_score"].values, recall_floor=0.90)
print(f"üéØ Umbral elegido (VAL, recall‚â•0.90): thr={thr:.4f}")

def compute_metrics(y, p, thr):
    yhat = (p>=thr).astype(int)
    return {
        "AUC":   float(roc_auc_score(y, p)) if len(np.unique(y))>1 else float('nan'),
        "PR-AUC":float(average_precision_score(y, p)) if len(np.unique(y))>1 else float('nan'),
        "Acc":   float(accuracy_score(y, yhat)),
        "P":     float(precision_score(y, yhat, zero_division=0)),
        "R":     float(recall_score(y, yhat, zero_division=0)),
        "thr":   float(thr),
        "n":     int(len(y)),
    }

VAL_MET  = compute_metrics(VAL_TOPK["y_true"].values,  VAL_TOPK["y_score"].values,  thr)
TEST_MET = compute_metrics(TEST_TOPK["y_true"].values, TEST_TOPK["y_score"].values, thr)
print("VAL :", VAL_MET)
print("TEST:", TEST_MET)

# Guardar CSVs y JSON de evaluaci√≥n
val_slices_T.to_csv(OUT_DIR/"val_slice_preds_plus.csv", index=False)
test_slices_T.to_csv(OUT_DIR/"test_slice_preds_plus.csv", index=False)
VAL_TOPK.to_csv(OUT_DIR/"val_patient_preds_plus.csv", index=False)
TEST_TOPK.to_csv(OUT_DIR/"test_patient_preds_plus.csv", index=False)

eval_json = {
  "pipeline": "ft_effb3_stable_plus",
  "seeds": cfg.seeds,
  "temperature": float(best_T),
  "pooling_used": "topk_mean_k5",
  "threshold": float(thr),
  "val_metrics": VAL_MET,
  "test_metrics": TEST_MET,
}
with open(OUT_DIR/"patient_eval_plus.json","w") as f:
    json.dump(eval_json, f, indent=2)

print("üìÅ Resultados guardados en:", OUT_DIR)


  model = create_fn(


üß™ Temperature scaling ajustado en HOLDOUT: T=3.8625


                                                                  

üéØ Umbral elegido (VAL, recall‚â•0.90): thr=0.0000
VAL : {'AUC': 0.9074074074074074, 'PR-AUC': 0.9200849012306183, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'thr': 0.0, 'n': 47}
TEST: {'AUC': 0.7388888888888888, 'PR-AUC': 0.6987755736478632, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'thr': 0.0, 'n': 47}
üìÅ Resultados guardados en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus




In [13]:
# Celda 9: GraÃÅficas (AUC/PR-AUC barras, punto (P,R) y matriz de confusi√≥n)

def save_bar(value, title, fname, ymax=1.0):
    plt.figure(figsize=(4,4))
    plt.bar([title], [value])
    plt.ylim(0, ymax)
    plt.title(title)
    plt.grid(True, axis='y', linestyle='--', alpha=0.4)
    out = GRAPHS_DIR / fname
    plt.tight_layout(); plt.savefig(out, dpi=150); plt.close()

def save_pr_point(precision, recall, fname):
    plt.figure(figsize=(4,4))
    plt.scatter([recall], [precision], s=80)
    plt.xlim(0,1); plt.ylim(0,1)
    plt.xlabel("Recall"); plt.ylabel("Precision")
    plt.title("Punto PR (TEST)")
    plt.grid(True, linestyle='--', alpha=0.4)
    out = GRAPHS_DIR / fname
    plt.tight_layout(); plt.savefig(out, dpi=150); plt.close()

def save_confusion(y_true, y_score, thr, fname):
    yhat = (y_score>=thr).astype(int)
    cm = confusion_matrix(y_true, yhat, labels=[1,0]) # [[TP, FN],[FP, TN]] si ordenas [1,0]
    TP, FN = cm[0,0], cm[0,1]
    FP, TN = cm[1,0], cm[1,1]
    plt.figure(figsize=(4,4))
    plt.imshow(cm, cmap="Blues")
    plt.title(f"Confusi√≥n TEST (thr={thr:.3f})")
    plt.xticks([0,1], ["Pred 1","Pred 0"])
    plt.yticks([0,1], ["Real 1","Real 0"])
    for i in range(2):
        for j in range(2):
            plt.text(j, i, cm[i,j], ha="center", va="center", color="black", fontsize=12)
    out = GRAPHS_DIR / fname
    plt.tight_layout(); plt.savefig(out, dpi=150); plt.close()
    return TP, FP, TN, FN

# Barras
save_bar(TEST_MET["AUC"],    "ROC-AUC (TEST)", "plus_bars_auc.png")
save_bar(TEST_MET["PR-AUC"], "PR-AUC (TEST)",  "plus_bars_prauc.png")

# Punto PR
save_pr_point(TEST_MET["P"], TEST_MET["R"], "plus_pr_point.png")

# Matriz de confusi√≥n TEST
TP, FP, TN, FN = save_confusion(TEST_TOPK["y_true"].values, TEST_TOPK["y_score"].values, TEST_MET["thr"], "plus_confusion.png")
print(f"‚úÖ Matriz de confusi√≥n TEST reconstruida: TP={TP}, FP={FP}, TN={TN}, FN={FN}")
print("üñºÔ∏è Gr√°ficas guardadas en:", GRAPHS_DIR)


‚úÖ Matriz de confusi√≥n TEST reconstruida: TP=20, FP=27, TN=0, FN=0
üñºÔ∏è Gr√°ficas guardadas en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/graphs_from_metrics


In [14]:
# --- Celda 10: Resumen final impreso ---
def pretty(d):
    return {k:(round(v,3) if isinstance(v,float) else v) for k,v in d.items()}

print("üì¶ Pipeline: ft_effb3_stable_colab")
print("üß™ Pooling:", eval_json["pooling_used"], "| T:", round(eval_json["temperature"],3), "| thr:", round(eval_json["threshold"],4))
print("VAL :", pretty(eval_json["val_metrics"]))
print("TEST:", pretty(eval_json["test_metrics"]))
print("CSV :", OUT_DIR / "val_patient_preds_calibrated.csv", " | ", OUT_DIR / "test_patient_preds_calibrated.csv")
print("JSON:", OUT_DIR / "effb3_stable_patient_eval.json")
print("üìÅ Gr√°ficas:", GRAPHS_DIR)


üì¶ Pipeline: ft_effb3_stable_colab
üß™ Pooling: topk_mean_k5 | T: 3.863 | thr: 0.0
VAL : {'AUC': 0.907, 'PR-AUC': 0.92, 'Acc': 0.426, 'P': 0.426, 'R': 1.0, 'thr': 0.0, 'n': 47}
TEST: {'AUC': 0.739, 'PR-AUC': 0.699, 'Acc': 0.426, 'P': 0.426, 'R': 1.0, 'thr': 0.0, 'n': 47}
CSV : /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/val_patient_preds_calibrated.csv  |  /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/test_patient_preds_calibrated.csv
JSON: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_stable_patient_eval.json
üìÅ Gr√°ficas: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/graphs_from_metrics


In [16]:
# --- Celda de robustez de checkpoints para la Fase PLUS ---
from pathlib import Path
import shutil

BASE = Path("/content/drive/MyDrive/CognitivaAI")

# Directorios
OUT_DIR_PLUS   = BASE / "ft_effb3_stable_colab_plus"
OUT_DIR_STABLE = BASE / "ft_effb3_stable_colab"
OUT_DIR_PLUS.mkdir(parents=True, exist_ok=True)

# Candidatos de checkpoint
ckpt_name_best   = "best_effb3_stable.pth"
ckpt_name_seed42 = "effb3_stable_seed42.pth"

CKPT_BEST_PLUS   = OUT_DIR_PLUS / ckpt_name_best
CKPT_BEST_STABLE = OUT_DIR_STABLE / ckpt_name_best
CKPT_SEED42      = OUT_DIR_STABLE / ckpt_name_seed42

# 1) Si ya existe en PLUS, perfecto
if CKPT_BEST_PLUS.exists():
    CKPT_BEST = CKPT_BEST_PLUS
    src_used = "PLUS (ya estaba)"
# 2) Si no existe en PLUS pero s√≠ en STABLE, copiamos
elif CKPT_BEST_STABLE.exists():
    shutil.copy2(CKPT_BEST_STABLE, CKPT_BEST_PLUS)
    CKPT_BEST = CKPT_BEST_PLUS
    src_used = f"COPIADO desde STABLE ‚Üí {CKPT_BEST_STABLE.name}"
# 3) Si no hay 'best' pero s√≠ tenemos el de seed42, lo usamos directamente
elif CKPT_SEED42.exists():
    CKPT_BEST = CKPT_SEED42
    src_used = "SEED42 en STABLE (no hab√≠a best)"
# 4) Si no hay nada, error guiado
else:
    raise FileNotFoundError(
        "‚ùå No encontr√© ning√∫n checkpoint.\n"
        f"Busqu√© en:\n - {CKPT_BEST_PLUS}\n - {CKPT_BEST_STABLE}\n - {CKPT_SEED42}\n"
        "Soluciones:\n - Reejecuta la celda de entrenamiento para generar el checkpoint\n"
        " - O ajusta manualmente CKPT_BEST al path correcto si lo tienes en otra carpeta."
    )

# Rutas de salida y gr√°ficos para la fase PLUS
GRAPHS_DIR = OUT_DIR_PLUS / "graphs_from_metrics"
GRAPHS_DIR.mkdir(parents=True, exist_ok=True)

print("‚úÖ Checkpoint listo para inferencia/calibraci√≥n")
print(f"   Fuente: {src_used}")
print(f"   CKPT_BEST = {CKPT_BEST}")
print(f"   GRAPHS_DIR = {GRAPHS_DIR}")

# (Opcional) Validaci√≥n adicional si quieres asegurar tama√±o > 0
assert CKPT_BEST.exists() and CKPT_BEST.stat().st_size > 0, "Checkpoint vac√≠o o corrupto."


‚úÖ Checkpoint listo para inferencia/calibraci√≥n
   Fuente: COPIADO desde STABLE ‚Üí best_effb3_stable.pth
   CKPT_BEST = /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/best_effb3_stable.pth
   GRAPHS_DIR = /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/graphs_from_metrics


In [22]:
# Celda A: inspeccionar checkpoint
import torch, os, json
from pathlib import Path

CKPT_BEST = Path("/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/best_effb3_stable.pth")
assert CKPT_BEST.exists(), f"No existe: {CKPT_BEST}"

ckpt = torch.load(CKPT_BEST, map_location="cpu")
print("Claves checkpoint:", list(ckpt.keys()))
state = ckpt.get("model", ckpt)  # por si guardaste dict con {"model": sd, "epoch":..., etc.}
print("Total de pesos en state_dict:", len(state))
# Vista r√°pida de algunas claves
for k in list(state.keys())[:12]:
    print(" ", k)


Claves checkpoint: ['seed', 'state_dict', 'best_holdout_auc']
Total de pesos en state_dict: 3
  seed
  state_dict
  best_holdout_auc


In [23]:
# Celda B: probar variantes de arch y reportar % de pesos cargados
import timm, torch, re
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
state = ckpt.get("model", ckpt)

def build_model(arch_name, num_out=1, drop=0.2):
    # num_out=1 para BCEWithLogits; si tu training fue con CE a 2 clases, pon num_out=2
    m = timm.create_model(arch_name, pretrained=False, num_classes=num_out, drop_rate=drop)
    return m

def try_load(arch_name, num_out):
    m = build_model(arch_name, num_out=num_out).to(device)
    missing, unexpected = m.load_state_dict(state, strict=False)
    n_total = sum(1 for _ in m.state_dict().keys())
    n_loaded = n_total - len(missing)
    print(f"ARCH={arch_name:<35} | loaded‚âà{n_loaded/n_total:5.1%} | missing={len(missing):3d} | unexpected={len(unexpected):3d}")
    return arch_name, m, missing, unexpected, n_loaded/n_total

# INTENTOS m√°s probables (ajusta num_out si entrenaste a 2 clases)
CANDIDATES = [
    "tf_efficientnet_b3.ns_jft_in1k",
    "tf_efficientnet_b3.ns_in1k",
    "tf_efficientnet_b3.in1k",
    "tf_efficientnet_b3",
    "efficientnet_b3"
]
results = []
for arch in CANDIDATES:
    try:
        results.append(try_load(arch, num_out=1))
    except Exception as e:
        print(f"  ‚úñ {arch}: {e}")

# escoge el que m√°s cargue (‚â•95% ideal; ‚â•70% aceptable)
best = max(results, key=lambda t: t[-1])
best_arch, best_model, missing, unexpected, ratio = best
print("\n‚Üí Mejor match:", best_arch, "| ratio cargado:", f"{ratio:.1%}")


ARCH=tf_efficientnet_b3.ns_jft_in1k      | loaded‚âà13.6% | missing=496 | unexpected=  3
  ‚úñ tf_efficientnet_b3.ns_in1k: Invalid pretrained tag (ns_in1k) for tf_efficientnet_b3.
ARCH=tf_efficientnet_b3.in1k             | loaded‚âà13.6% | missing=496 | unexpected=  3
ARCH=tf_efficientnet_b3                  | loaded‚âà13.6% | missing=496 | unexpected=  3
ARCH=efficientnet_b3                     | loaded‚âà13.6% | missing=496 | unexpected=  3

‚Üí Mejor match: tf_efficientnet_b3.ns_jft_in1k | ratio cargado: 13.6%


In [28]:
# Celda C (reparada): normalizar un checkpoint y dejarlo listo para inferencia estable

from pathlib import Path
import torch
import re
import json
from collections import OrderedDict

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === RUTAS ===
# Si tu mejor ckpt est√° en la carpeta "stable_colab":
CKPT_IN  = Path("/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab/effb3_stable_seed42.pth")
# Alternativa (pipeline 7, por si lo anterior no existe):
if not CKPT_IN.exists():
    CKPT_IN = Path("/content/drive/MyDrive/CognitivaAI/ft_effb3_colab/best_ft_effb3.pth")

OUT_DIR  = Path("/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus")
OUT_DIR.mkdir(parents=True, exist_ok=True)
CKPT_OUT = OUT_DIR / "best_effb3_stable.pth"

# === Definici√≥n del modelo (debe coincidir con el que entrenaste) ===
import timm
import torch.nn as nn

class EffB3Binary(nn.Module):
    def __init__(self, pretrained=False, arch="tf_efficientnet_b3.ns_jft_in1k", num_out=1):
        super().__init__()
        self.backbone = timm.create_model(arch, pretrained=pretrained, num_classes=0)  # feature extractor
        self.head = nn.Linear(self.backbone.num_features, num_out)
    def forward(self, x):
        feat = self.backbone(x)
        return self.head(feat).squeeze(1)  # logits

arch = "tf_efficientnet_b3.ns_jft_in1k"
num_out = 1
model = EffB3Binary(pretrained=False, arch=arch, num_out=num_out).to(device)

def load_raw_ckpt(path):
    ckpt = torch.load(path, map_location="cpu")
    # Algunos ckpts guardados como dict con 'state_dict', otros como state_dict plano
    if isinstance(ckpt, dict) and "state_dict" in ckpt:
        sd = ckpt["state_dict"]
        meta = {k:v for k,v in ckpt.items() if k != "state_dict"}
    else:
        sd = ckpt
        meta = {}
    return sd, meta

def try_prefix_remap(sd_in, want_prefix="backbone."):
    """
    Normaliza prefijos:
      - si las claves vienen como 'blocks.*', a√±ade 'backbone.' -> 'backbone.blocks.*'
      - si ya vienen como 'backbone.blocks.*', las deja igual
      - re-mapea 'classifier' o 'fc' a 'head' si procede
    """
    sd_out = OrderedDict()
    for k,v in sd_in.items():
        newk = k

        # Mapeos comunes de nombre de cabeza
        newk = re.sub(r"^(classifier|fc)\.(weight|bias)$", r"head.\1", newk)

        # Si no lleva backbone. y empieza por blocks, a√±adirlo
        if newk.startswith("blocks.") and not newk.startswith("backbone."):
            newk = "backbone." + newk
        # Si claves internas del backbone a veces vienen como 'conv_stem.', 'bn1.', etc.
        if (newk.startswith("conv_stem.") or newk.startswith("bn1.") or newk.startswith("act1.")
            or newk.startswith("blocks.") or newk.startswith("conv_head.") or newk.startswith("bn2.")):
            if not newk.startswith("backbone."):
                newk = "backbone." + newk

        sd_out[newk] = v
    return sd_out

def load_with_flexible_mapping(model, sd_in):
    model_sd = model.state_dict()
    mapped = try_prefix_remap(sd_in)

    # Filtra solo las claves que existen en el modelo y coinciden en tama√±o
    loadable = OrderedDict()
    missing, shape_mismatch, unexpected = [], [], []
    for k, v in mapped.items():
        if k in model_sd:
            if tuple(v.shape) == tuple(model_sd[k].shape):
                loadable[k] = v
            else:
                shape_mismatch.append(k)
        else:
            unexpected.append(k)

    # Qu√© nos falta del modelo
    for k in model_sd.keys():
        if k not in loadable:
            missing.append(k)

    ratio = len(loadable) / max(1, len(model_sd))
    msg = (
        f"Carga parcial: loaded‚âà{ratio*100:.1f}%\n"
        f"  (ejemplos missing) {missing[:5]}\n"
        f"  (ejemplos unexpected en ckpt) {unexpected[:5]}"
    )
    print(msg)

    model_sd.update(loadable)
    model.load_state_dict(model_sd, strict=False)
    return ratio, missing, unexpected, shape_mismatch

# --- Ejecutar normalizaci√≥n ---
print(f"Intentando normalizar ckpt: {CKPT_IN}")
sd_in, meta = load_raw_ckpt(CKPT_IN)
ratio, missing, unexpected, mism = load_with_flexible_mapping(model, sd_in)

if ratio < 0.70:
    raise RuntimeError(
        f"Muy pocos pesos cargados ({ratio*100:.1f}%). "
        f"Revisa que CKPT_IN apunte a TU checkpoint entrenado con esta misma arch+head "
        f"o apunta a /ft_effb3_colab/best_ft_effb3.pth si ese es el bueno."
    )

# Guardar ckpt limpio con metadatos
clean = {
    "arch": arch,
    "num_out": num_out,
    "state_dict": model.state_dict(),
    "meta": {
        "source": str(CKPT_IN),
        **meta
    }
}
torch.save(clean, CKPT_OUT)
print(f"‚úÖ Checkpoint limpio guardado en: {CKPT_OUT}")




Intentando normalizar ckpt: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab/effb3_stable_seed42.pth
Carga parcial: loaded‚âà99.7%
  (ejemplos missing) ['head.weight', 'head.bias']
  (ejemplos unexpected en ckpt) ['head.classifier']
‚úÖ Checkpoint limpio guardado en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/best_effb3_stable.pth


In [29]:
# Celda D (reparada): inferencia estable con el ckpt limpio

import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from pathlib import Path
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_score, recall_score
from tqdm import tqdm
import timm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === RUTAS (ajusta si usaste otras) ===
BASE_DIR   = Path("/content/drive/MyDrive/CognitivaAI")
DATA_DIR   = BASE_DIR / "oas1_data"
OUT_DIR    = BASE_DIR / "ft_effb3_stable_colab_plus"
CKPT_BEST  = OUT_DIR / "best_effb3_stable.pth"
GRAPHS_DIR = OUT_DIR / "graphs_from_metrics"
GRAPHS_DIR.mkdir(parents=True, exist_ok=True)

VAL_MAP   = DATA_DIR / "oas1_val_colab_mapped.csv"
TEST_MAP  = DATA_DIR / "oas1_test_colab_mapped.csv"

assert CKPT_BEST.exists(), f"‚ùå No existe checkpoint limpio: {CKPT_BEST}"
assert VAL_MAP.exists() and TEST_MAP.exists(), "‚ùå Faltan CSV mapeados val/test"

# === Modelo ===
class EffB3Binary(nn.Module):
    def __init__(self, pretrained=False, arch="tf_efficientnet_b3.ns_jft_in1k", num_out=1):
        super().__init__()
        self.backbone = timm.create_model(arch, pretrained=pretrained, num_classes=0)
        self.head = nn.Linear(self.backbone.num_features, num_out)
    def forward(self, x):
        feat = self.backbone(x)
        return self.head(feat).squeeze(1)

ckpt = torch.load(CKPT_BEST, map_location="cpu")
arch = ckpt.get("arch", "tf_efficientnet_b3.ns_jft_in1k")
num_out = ckpt.get("num_out", 1)

model = EffB3Binary(pretrained=False, arch=arch, num_out=num_out).to(device)
model.load_state_dict(ckpt["state_dict"], strict=True)
model.eval()

# === Data utils (mismo formato que ten√≠as) ===
from PIL import Image
import torchvision.transforms as T

MEAN=(0.485,0.456,0.406)
STD =(0.229,0.224,0.225)
IMG_SIZE=300

tx = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.ToTensor(),
    T.Normalize(MEAN, STD)
])

def read_df(mapped_csv):
    df = pd.read_csv(mapped_csv)
    # Estandarizar nombre de columnas si vinieran distintas
    colmap = {"target":"y_true", "label":"y_true", "y": "y_true", "png":"png_path", "path":"png_path"}
    for k,v in colmap.items():
        if k in df.columns and v not in df.columns:
            df[v] = df[k]
    keep = ["patient_id","y_true","png_path"]
    return df[keep].copy()

val_df  = read_df(VAL_MAP)
test_df = read_df(TEST_MAP)

def infer_df(df, batch=64):
    # por slices
    xs, ys, pids = [], [], []
    paths = df["png_path"].tolist()
    labs  = df["y_true"].tolist()
    pidsl = df["patient_id"].tolist()

    logits_all = []
    with torch.no_grad(), torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
        for i in tqdm(range(0, len(paths), batch), desc="Inferencia slices"):
            batch_imgs=[]
            for p in paths[i:i+batch]:
                im = Image.open(p).convert("RGB")
                batch_imgs.append(tx(im))
            x = torch.stack(batch_imgs).to(device)
            logit = model(x)
            logits_all.append(logit.detach().float().cpu())
    logits_all = torch.cat(logits_all, dim=0).numpy()

    df_out = pd.DataFrame({
        "patient_id": pidsl,
        "y_true": labs,
        "logits": logits_all  # 1-D
    })
    # pooling paciente (mean de logits ‚Üí proba v√≠a sigmoid)
    g = df_out.groupby("patient_id")
    pooled = g.agg(
        y_true=("y_true", lambda v: int(np.round(np.mean(v)))),
        logit=("logits", np.mean)
    ).reset_index()
    pooled["y_score"] = 1/(1+np.exp(-pooled["logit"]))
    return df_out, pooled

val_slices, val_pat = infer_df(val_df)
test_slices, test_pat = infer_df(test_df)

# === Selecci√≥n de umbral: F1 en HOLDOUT (usa tu holdout si lo tienes persistido; si no, usa VAL) ===
def find_best_thr_by_f1(df_pat):
    y = df_pat["y_true"].values
    s = df_pat["y_score"].values
    thrs = np.linspace(0.05, 0.95, 19)
    best = (0.0, 0.5)
    from sklearn.metrics import f1_score
    for t in thrs:
        f1 = f1_score(y, (s>=t).astype(int))
        if f1>best[0]:
            best = (f1, t)
    return best[1]

thr = find_best_thr_by_f1(val_pat)  # si tienes holdout_pat usa ese DF aqu√≠
print(f"üß™ Pooling=mean | Umbral (val F1-opt)={thr:.4f}")

def metrics(df_pat, thr):
    y = df_pat["y_true"].values
    s = df_pat["y_score"].values
    yhat = (s>=thr).astype(int)
    out = {
        "AUC": float(roc_auc_score(y,s)) if len(np.unique(y))>1 else np.nan,
        "PR-AUC": float(average_precision_score(y,s)),
        "Acc": float(accuracy_score(y,yhat)),
        "P": float(precision_score(y,yhat, zero_division=0)),
        "R": float(recall_score(y,yhat)),
        "thr": float(thr),
        "n": int(len(y))
    }
    return out

m_val  = metrics(val_pat, thr)
m_test = metrics(test_pat, thr)
print("VAL :", m_val)
print("TEST:", m_test)

# === Guardados ===
OUT_DIR.mkdir(parents=True, exist_ok=True)
val_slices.to_csv(OUT_DIR/"val_png_preds.csv", index=False)
test_slices.to_csv(OUT_DIR/"test_png_preds.csv", index=False)
val_pat.to_csv(OUT_DIR/"val_patient_preds.csv", index=False)
test_pat.to_csv(OUT_DIR/"test_patient_preds.csv", index=False)

EVAL_JSON = OUT_DIR / "effb3_stable_patient_eval.json"
with open(EVAL_JSON, "w", encoding="utf-8") as f:
    json.dump({"pooling_used":"mean", "thr":thr, "val_metrics":m_val, "test_metrics":m_test}, f, indent=2)

print(f"üìù Eval JSON guardado en: {EVAL_JSON}")
print(f"üìÅ CSV guardados en: {OUT_DIR}")


Inferencia slices: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15/15 [00:09<00:00,  1.53it/s]
  pooled = g.agg(
Inferencia slices: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15/15 [00:09<00:00,  1.54it/s]

üß™ Pooling=mean | Umbral (val F1-opt)=0.5000
VAL : {'AUC': 0.6296296296296295, 'PR-AUC': 0.6673015670022289, 'Acc': 0.5106382978723404, 'P': 0.46153846153846156, 'R': 0.9, 'thr': 0.49999999999999994, 'n': 47}
TEST: {'AUC': 0.5462962962962963, 'PR-AUC': 0.5262250795839319, 'Acc': 0.5319148936170213, 'P': 0.4666666666666667, 'R': 0.7, 'thr': 0.49999999999999994, 'n': 47}
üìù Eval JSON guardado en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_stable_patient_eval.json
üìÅ CSV guardados en: /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus



  pooled = g.agg(


In [30]:
# E1: calibraci√≥n (T) en holdout + sweep de umbral y pooling alternativos
import json, os
import numpy as np, pandas as pd
from pathlib import Path
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, accuracy_score

BASE = Path("/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus")
VAL_CSV  = BASE/"val_patient_preds.csv"
TEST_CSV = BASE/"test_patient_preds.csv"
assert VAL_CSV.exists() and TEST_CSV.exists(), "Faltan CSV patient preds."

def _ensure_cols(df):
    # Acepta columnas 'logits' o 'y_score'; si solo hay 'y_score', lo tratamos como logits~score para T=1
    if 'logits' in df.columns:
        return df.rename(columns={'logits':'logits_raw'})
    elif 'y_score' in df.columns:
        df = df.rename(columns={'y_score':'logits_raw'})
        # Si eran probabilidades, clip para evitar infs al logit
        eps = 1e-6
        p = np.clip(df['logits_raw'].values.astype(float), eps, 1-eps)
        df['logits_raw'] = np.log(p/(1-p))
        return df
    else:
        raise ValueError("CSV debe tener 'logits' o 'y_score'.")

val = _ensure_cols(pd.read_csv(VAL_CSV))
tes = _ensure_cols(pd.read_csv(TEST_CSV))

# --- pooling alternativos sobre slices ya agregados por paciente (si tu CSV ya es por paciente, saltar√° tal cual):
# Si tienes por-slice en otros CSV, aqu√≠ podr√≠as agrupar por patient_id aplicando mean/median/topk previamente.
# Asumimos que estos CSV ya son nivel paciente con una fila por paciente.

y_val = val['y_true'].astype(int).values
z_val = val['logits_raw'].values  # "logits" no calibrados
y_tes = tes['y_true'].astype(int).values
z_tes = tes['logits_raw'].values

def sigmoid(x): return 1/(1+np.exp(-x))

# ---- Estimate Temperature T on holdout by minimizing NLL (simple grid for robustness)
def est_temperature(z, y, grid=np.linspace(0.5, 3.5, 61)):
    bestT, bestNLL = 1.0, 1e9
    for T in grid:
        p = sigmoid(z / T)
        eps = 1e-8
        nll = -np.mean(y*np.log(p+eps) + (1-y)*np.log(1-p+eps))
        if nll < bestNLL:
            bestNLL, bestT = nll, T
    return bestT

T = est_temperature(z_val, y_val)
p_val = sigmoid(z_val / T)
p_tes = sigmoid(z_tes / T)

# ---- Sweep de umbral maximizando PR-AUC y alternativas cl√≠nicas
def eval_at_thr(y, p, thr):
    yhat = (p>=thr).astype(int)
    return {
        "Acc": accuracy_score(y, yhat),
        "P":   ( (yhat[yhat==1].size and (y[yhat==1]==1).sum()/yhat.sum()) or 0.0 ),
        "R":   ( (y[y==1].size and (y[(y==1)&(yhat==1)]==1).sum()/ (y==1).sum()) or 0.0 )
    }

def full_metrics(y, p):
    auc    = roc_auc_score(y, p)
    prauc  = average_precision_score(y, p)
    # thr por F1-opt:
    prec, rec, thr = precision_recall_curve(y, p)
    f1 = (2*prec*rec)/(prec+rec+1e-9)
    i  = np.argmax(f1)
    thr_f1 = (thr[i-1] if i>0 and i-1 < len(thr) else 0.5)
    return auc, prauc, float(thr_f1)

auc_val, pr_val, thr_f1_val = full_metrics(y_val, p_val)

# Opci√≥n cl√≠nica: forzar Recall >= 0.9
def thr_for_recall(y, p, recall_target=0.90):
    prec, rec, thr = precision_recall_curve(y, p)
    idx = np.where(rec>=recall_target)[0]
    if len(idx)==0:
        return None
    j = idx[0]
    return thr[j-1] if j>0 else 0.0

thr_rec09 = thr_for_recall(y_val, p_val, 0.90)
thr_use   = thr_rec09 if thr_rec09 is not None else thr_f1_val

val_sum = {"AUC":auc_val, "PR-AUC":pr_val, "thr":thr_use, **eval_at_thr(y_val, p_val, thr_use), "n":int(len(y_val))}
tes_sum = {"AUC":roc_auc_score(y_tes,p_tes), "PR-AUC":average_precision_score(y_tes,p_tes), "thr":thr_use, **eval_at_thr(y_tes, p_tes, thr_use), "n":int(len(y_tes))}

print(f"üß™ T estimada en holdout: {T:.3f} | thr usado: {thr_use:.4f} (rec‚â•0.90? {'s√≠' if thr_use==thr_rec09 else 'no'})")
print("VAL :", {k:(float(v) if hasattr(v, "__float__") else v) for k,v in val_sum.items()})
print("TEST:", {k:(float(v) if hasattr(v, "__float__") else v) for k,v in tes_sum.items()})

# Guardar eval JSON
out_json = BASE/"effb3_stable_patient_eval_calibrated.json"
with open(out_json, "w") as f:
    json.dump({"temperature":float(T),"thr":float(thr_use),"val_metrics":val_sum,"test_metrics":tes_sum}, f, indent=2)
print("üìù Eval JSON (calibrado) ‚Üí", out_json)


üß™ T estimada en holdout: 0.500 | thr usado: 0.0000 (rec‚â•0.90? s√≠)
VAL : {'AUC': 0.6296296296296295, 'PR-AUC': 0.6673015670022289, 'thr': 0.0, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'n': 47.0}
TEST: {'AUC': 0.5462962962962963, 'PR-AUC': 0.5262250795839319, 'thr': 0.0, 'Acc': 0.425531914893617, 'P': 0.425531914893617, 'R': 1.0, 'n': 47.0}
üìù Eval JSON (calibrado) ‚Üí /content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus/effb3_stable_patient_eval_calibrated.json


In [31]:
# E2: recomputar paciente con pooling alternativos desde CSV por slice
from pathlib import Path
import numpy as np, pandas as pd
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_recall_curve

BASE = Path("/content/drive/MyDrive/CognitivaAI/ft_effb3_stable_colab_plus")
VAL_S = BASE/"val_png_preds.csv"
TES_S = BASE/"test_png_preds.csv"
assert VAL_S.exists() and TES_S.exists(), "Faltan CSV por slice."

def logits_from_score(s):
    eps=1e-6
    s = np.clip(s.astype(float), eps, 1-eps)
    return np.log(s/(1-s))

def pool_topk(arr, k=0.2):
    if len(arr)==0: return np.nan
    kk = max(1, int(np.ceil(k*len(arr))))
    return np.mean(np.sort(arr)[-kk:])

def aggregate(df_slices, pooling="mean", T=None):
    # Espera columnas: patient_id, y_true, y_score o logits
    if "y_score" in df_slices.columns:
        z = logits_from_score(df_slices["y_score"].values)
        df_slices = df_slices.copy()
        df_slices["logits"] = z
    assert "patient_id" in df_slices and "y_true" in df_slices and "logits" in df_slices

    if pooling=="mean":
        g = df_slices.groupby("patient_id").agg(
            y_true=("y_true", lambda v:int(np.round(np.mean(v)))),
            logits=("logits", "mean")
        )
    elif pooling=="median":
        g = df_slices.groupby("patient_id").agg(
            y_true=("y_true", lambda v:int(np.round(np.mean(v)))),
            logits=("logits", "median")
        )
    elif pooling.startswith("topk"):
        frac = float(pooling.split("=")[-1]) if "=" in pooling else 0.2
        g = df_slices.groupby("patient_id").agg(
            y_true=("y_true", lambda v:int(np.round(np.mean(v)))),
            logits=("logits", lambda v: pool_topk(np.array(v), frac))
        )
    else:
        raise ValueError("pooling desconocido")
    g = g.reset_index()
    z = g["logits"].values
    p = 1/(1+np.exp(-(z if T is None else z/T)))
    return g["y_true"].values.astype(int), p

def report(y, p, name):
    auc   = roc_auc_score(y,p)
    prauc = average_precision_score(y,p)
    prec, rec, thr = precision_recall_curve(y,p)
    f1 = (2*prec*rec)/(prec+rec+1e-9)
    i  = np.argmax(f1)
    thr_f1 = (thr[i-1] if i>0 else 0.5)
    yhat = (p>=thr_f1).astype(int)
    acc  = accuracy_score(y,yhat)
    P = ((yhat.sum()>0) and ( ( (y[yhat==1]==1).sum() / yhat.sum() ) )) or 0.0
    R = ( (y==1).sum()>0 and ((y[(y==1)&(yhat==1)]==1).sum() / (y==1).sum()) ) or 0.0
    print(f"{name:>14s} | AUC={auc:.3f} | PR-AUC={prauc:.3f} | Acc={acc:.3f} | P={P:.2f} | R={R:.2f} | thrF1={thr_f1:.3f}")

val_s = pd.read_csv(VAL_S)
tes_s = pd.read_csv(TES_S)

# Usa la T reci√©n estimada en E1 si existe:
T = None
cal_json = BASE/"effb3_stable_patient_eval_calibrated.json"
if cal_json.exists():
    import json
    T = json.load(open(cal_json))["temperature"]

for pooling in ["mean","median","topk=0.2","topk=0.3"]:
    yv,pv = aggregate(val_s, pooling=pooling, T=T)
    yt,pt = aggregate(tes_s, pooling=pooling, T=T)
    report(yv,pv, f"VAL {pooling}")
    report(yt,pt, f"TEST {pooling}")


      VAL mean | AUC=0.630 | PR-AUC=0.667 | Acc=0.532 | P=0.47 | R=0.85 | thrF1=0.504
     TEST mean | AUC=0.546 | PR-AUC=0.526 | Acc=0.511 | P=0.47 | R=1.00 | thrF1=0.491
    VAL median | AUC=0.643 | PR-AUC=0.653 | Acc=0.574 | P=0.50 | R=0.85 | thrF1=0.503
   TEST median | AUC=0.541 | PR-AUC=0.513 | Acc=0.532 | P=0.48 | R=1.00 | thrF1=0.492
  VAL topk=0.2 | AUC=0.602 | PR-AUC=0.655 | Acc=0.532 | P=0.47 | R=0.85 | thrF1=0.547
 TEST topk=0.2 | AUC=0.583 | PR-AUC=0.502 | Acc=0.553 | P=0.49 | R=1.00 | thrF1=0.537
  VAL topk=0.3 | AUC=0.607 | PR-AUC=0.658 | Acc=0.532 | P=0.47 | R=0.85 | thrF1=0.541
 TEST topk=0.3 | AUC=0.567 | PR-AUC=0.480 | Acc=0.553 | P=0.49 | R=1.00 | thrF1=0.528
