Celda 0 — Bootstrap / paths:

In [None]:
# Celda 0 — Rutas base y utilidades simples
from pathlib import Path
import sys, os, re, json, math, datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

# Raíz del repo
ROOT = Path.cwd().parents[0] if (Path.cwd().name == "notebooks") else Path.cwd()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

OUT = ROOT / "outputs"
SUMMARY = OUT / "summary"
SUMMARY.mkdir(parents=True, exist_ok=True)

print("ROOT   :", ROOT)
print("OUT    :", OUT)
print("SUMMARY:", SUMMARY)

# -------------------- Helpers de E/S --------------------
def _abs_run_dir(run_dir: str | Path) -> Path:
    p = Path(run_dir)
    return p if p.is_absolute() else (OUT / p)

def _read_json(path: Path):
    try:
        if path.exists():
            return json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        pass
    return None

def _read_csv_df(path: Path):
    try:
        if path.exists():
            return pd.read_csv(path)
    except Exception:
        pass
    return None

def run_mtime(run_dir: str | Path) -> float:
    """Máximo mtime de carpeta y su primer nivel (robusto)."""
    p = _abs_run_dir(run_dir)
    try:
        mt = [p.stat().st_mtime]
        for c in p.iterdir():
            try:
                mt.append(c.stat().st_mtime)
            except Exception:
                pass
        return max(mt)
    except Exception:
        return 0.0

def canonical_method(s: str) -> str:
    """Normaliza etiquetas de método para agrupar."""
    if not isinstance(s, str):
        return "unknown"
    t = s.lower()
    if ("rehearsal" in t) and ("+ewc" in t or "_ewc" in t):
        return "rehearsal+ewc"
    if "sca-snn" in t: return "sca-snn"
    if re.search(r"\bsa[-_]snn\b", t): return "sa-snn"
    if re.search(r"\bas[-_]snn\b", t): return "as-snn"
    if "colanet" in t: return "colanet"
    if re.search(r"\bewc\b", t) or "ewc_lam" in t: return "ewc"
    if "rehearsal" in t: return "rehearsal"
    if "naive" in t or "finetune" in t or "fine-tune" in t: return "naive"
    return t.split("_")[0]

def _safe_float(x, default=np.nan):
    try:
        if x is None: return default
        return float(x)
    except Exception:
        return default


Celda 1 — Config de selección

In [None]:
# Celda 1 — Config de selección y salida

# Etiqueta de este corte (todo lo generado irá aquí)
SUMMARY_LABEL = "paper_set_accurate_2025-11-03"
THIS_SUMMARY = SUMMARY / SUMMARY_LABEL
THIS_SUMMARY.mkdir(parents=True, exist_ok=True)

# Filtros duros (ajusta a tu foto “buena”)
PRESET_FILTER    = "accurate"         # None para no filtrar
ENCODER_FILTER   = "rate"             # None para no filtrar
SEED_FILTER      = 42                 # None para no filtrar
METHODS_KEEP     = {"sa-snn","as-snn","sca-snn","ewc","rehearsal","naive"}  # set() para no filtrar

# Comparabilidad (mismo modelo/T/amp) y batch_size “tolerante”
MODEL_MATCH_SUBSTR = "PilotNetSNN_66x200_gray"  # substring
T_TARGET           = 30
AMP_REQUIRED       = True
BATCH_SIZE_TARGET  = 160        # tolerante: igual o NaN (cuando no conste)
STRICT_CFG         = True       # si False, no se exige comparabilidad estricta

# Corte temporal
MTIME_FROM = dt.datetime(2025, 10, 31, 0, 0, 0)  # None para no filtrar por fecha

# Restringir a runs del runner nuevo (run_row.*)
ONLY_NEW_RUNNER = True

# Filtros opcionales extra
TAG_INCLUDE_SUBSTR = []         # p.ej., ["best_", "grid05_"]
RUN_DIR_WHITELIST  = []         # p.ej., ["continual_accurate_ewc_..."]

# Si faltan métricas, intentar leer eval_matrix.(csv|json)
REQUIRE_EVALMATRIX = False      # si True, descartamos runs que no la tengan

# Salidas “de ranking”
TOPN = 6
ALPHA_COMPOSITE = 0.5           # peso mae_norm vs forget_norm en score


Celda 2 — Construcción de la tabla base (una sola vez, fuente única)

In [None]:
# Celda 2 — Reconstrucción 100% desde ficheros
# (parser robusto: per_task_perf como dict o lista, fallback a CSV y run_row.json; amp normalizado)

import re, math, json
import numpy as np
import pandas as pd
from pathlib import Path

# --- Utilidades de lectura seguras (usamos tus helpers si existen; si no, definimos mínimos) ---
def _read_json(p: Path):
    try:
        if p.exists():
            return json.loads(p.read_text(encoding="utf-8"))
    except Exception:
        pass
    return None

def _read_csv_df(p: Path):
    try:
        if p.exists():
            return pd.read_csv(p)
    except Exception:
        pass
    return None

def _safe_float(x, default=np.nan):
    try:
        if isinstance(x, bool) or x is None:
            return default
        return float(x)
    except Exception:
        return default

def _as_bool(x):
    if isinstance(x, bool):
        return x
    if x is None or (isinstance(x, float) and np.isnan(x)):
        return None
    s = str(x).strip().lower()
    if s in {"true","1","yes","y"}:
        return True
    if s in {"false","0","no","n"}:
        return False
    return None

# --- 1) Metadatos básicos (solo desde ficheros) ---
def _parse_basic_meta(run_dir: Path):
    """
    Extrae preset, method, encoder, model, seed, T, amp, batch_size desde ficheros del run.
    Prioridad: run_row.json → manifests → heurística de nombre de carpeta.
    """
    jrow = _read_json(run_dir / "run_row.json")

    def jget(*keys, default=None):
        obj = jrow or {}
        for k in keys:
            if not isinstance(obj, dict):
                return default
            obj = obj.get(k, None)
        return obj if obj is not None else default

    preset   = jget("preset")
    method   = jget("method")
    encoder  = jget("encoder")
    model    = jget("model")
    seed     = jget("seed")
    T        = jget("T", default=jget("meta","T"))
    amp      = jget("amp", default=None)
    batch_sz = jget("batch_size", default=jget("meta","batch_size"))

    # Fallbacks desde manifest del primer task
    man1 = _read_json(run_dir / "task_1_circuito1" / "manifest.json")
    if model is None:
        model = (man1 or {}).get("model_name") or (man1 or {}).get("model")
    if batch_sz is None:
        meta1 = (man1 or {}).get("meta", {}) if isinstance(man1, dict) else {}
        batch_sz = meta1.get("batch_size")

    # Heurística de nombre si faltan preset/method/encoder
    if preset is None or method is None or encoder is None:
        # Nombre tipo: continual_<preset>_<method>_..._(rate|latency|raw|image)_model-..._seed_42
        name = run_dir.name
        m = re.match(r"continual_([^_]+)_([^_].*?)_(rate|latency|raw|image)\b", name)
        if m:
            preset  = preset  or m.group(1)
            method  = method  or m.group(2)
            encoder = encoder or m.group(3)

    # Tipados / normalizaciones
    seed    = _safe_float(seed)
    T       = _safe_float(T)
    amp_v   = _as_bool(amp)
    batch_sz = _safe_float(batch_sz)

    return dict(
        preset=preset, method=method, encoder=encoder, model=model,
        seed=seed, T=T, amp=amp_v, batch_size=batch_sz
    )

# --- 2) Lectura robusta de per_task (dict o lista; fallback CSV; último recurso run_row.json) ---
def _read_per_task_perf(run_dir: Path):
    """
    Devuelve un dict con entradas por tarea:
      {"task_1_circuito1": {"best_mae":..., "final_mae":...}, ...}
    Acepta per_task_perf.json como dict o lista; fallback a per_task_perf.csv.
    Último recurso: intenta poblar desde run_row.json si trae los MAE por tarea.
    """
    out = {}

    # JSON principal
    js = _read_json(run_dir / "per_task_perf.json")
    if isinstance(js, dict):
        # Estructura ya buena
        out = {str(k): (v if isinstance(v, dict) else {}) for k, v in js.items()}
    elif isinstance(js, list):
        # Convertir lista a dict canónico
        for i, it in enumerate(js):
            if not isinstance(it, dict):
                continue
            name  = it.get("task") or it.get("name") or it.get("task_name") or it.get("task_id") or f"task_{i+1}"
            best  = it.get("best_mae")
            final = it.get("final_mae") or it.get("val_final_mae") or it.get("val_last_mae")
            out[str(name)] = {"best_mae": _safe_float(best), "final_mae": _safe_float(final)}

    # Fallback CSV si faltan datos clave
    if (not out) or all(("circuito1" not in k and "circuito2" not in k) for k in out.keys()):
        df = _read_csv_df(run_dir / "per_task_perf.csv")
        if df is not None and len(df):
            # Detecta columnas: nombre tarea
            tcol = None
            for c in ["task","name","task_name","task_id"]:
                if c in df.columns:
                    tcol = c
                    break
            # Detecta columnas best/final
            best_col  = "best_mae" if "best_mae" in df.columns else ("val_best_mae" if "val_best_mae" in df.columns else None)
            final_col = "final_mae" if "final_mae" in df.columns else (
                        "val_final_mae" if "val_final_mae" in df.columns else (
                        "val_last_mae" if "val_last_mae" in df.columns else None))
            if tcol and best_col and final_col:
                for _, r in df.iterrows():
                    name  = str(r[tcol])
                    best  = _safe_float(r.get(best_col))
                    final = _safe_float(r.get(final_col))
                    out[name] = {"best_mae": best, "final_mae": final}

    # Último recurso: run_row.json con maes por tarea
    if ("task_1_circuito1" not in out) or ("task_2_circuito2" not in out):
        rr = _read_json(run_dir / "run_row.json") or {}
        c1b = _safe_float(rr.get("circuito1_best_mae"))
        c1f = _safe_float(rr.get("circuito1_final_mae"))
        c2b = _safe_float(rr.get("circuito2_best_mae"))
        c2f = _safe_float(rr.get("circuito2_final_mae"))
        if not np.isnan(c1b) or not np.isnan(c1f):
            out.setdefault("task_1_circuito1", {})["best_mae"]  = c1b
            out.setdefault("task_1_circuito1", {})["final_mae"] = c1f
        if not np.isnan(c2b) or not np.isnan(c2f):
            out.setdefault("task_2_circuito2", {})["best_mae"]  = c2b
            out.setdefault("task_2_circuito2", {})["final_mae"] = c2f

    return out

# --- 3) Otros lectores (forgetting, eficiencia, eval_matrix) ---
def _read_forgetting(run_dir: Path):
    """Lee forgetting.json si está disponible."""
    js = _read_json(run_dir / "forgetting.json")
    return js or {}

def _read_efficiency(run_dir: Path):
    """Lee emisiones/tiempo desde efficiency_summary.json o emissions.csv o continual_results.json."""
    j = _read_json(run_dir / "efficiency_summary.json") or {}
    emissions = _safe_float(j.get("emissions_kg"), default=np.nan)
    elapsed   = _safe_float(j.get("elapsed_sec"), default=np.nan)

    if math.isnan(emissions):
        df = _read_csv_df(run_dir / "emissions.csv")
        if df is not None:
            col = "co2e_kg" if "co2e_kg" in df.columns else ("emissions_kg" if "emissions_kg" in df.columns else None)
            if col:
                emissions = float(pd.to_numeric(df[col], errors="coerce").fillna(0).sum())

    if math.isnan(elapsed):
        j2 = _read_json(run_dir / "continual_results.json") or {}
        elapsed = _safe_float(j2.get("elapsed_sec"), default=np.nan)

    return emissions, elapsed

def _read_eval_matrix(run_dir: Path):
    """Carga eval_matrix si existe (csv o json). Devuelve DataFrame si posible."""
    p_csv = run_dir / "eval_matrix.csv"
    p_json = run_dir / "eval_matrix.json"
    if p_csv.exists():
        try:
            return pd.read_csv(p_csv)
        except Exception:
            return None
    js = _read_json(p_json)
    if js is not None:
        try:
            return pd.DataFrame(js)
        except Exception:
            pass
    return None

# --- 4) Olvido a partir de eval_matrix si no hay forgetting.json ---
def _compute_forgetting_from_eval_matrix(eval_df: pd.DataFrame, per_task: dict):
    """
    Para 2 tareas (circuito1, circuito2), calcula olvido de la tarea 1 tras aprender la 2.
      best@t1: best_mae en task_1_circuito1 (per_task)
      final@t2 sobre t1: eval_matrix (última columna relacionada con circuito1)
    """
    out = {}
    if not isinstance(per_task, dict):
        return out

    # best en circuito1 (tarea 1)
    t1_key = None
    for k in per_task.keys():
        if "circuito1" in str(k).lower():
            t1_key = k
            break
    best_t1 = None
    if t1_key:
        best_t1 = _safe_float(per_task.get(t1_key, {}).get("best_mae"))

    # final sobre circuito1 después de aprender la última tarea
    final_t1_after_last = None
    if isinstance(eval_df, pd.DataFrame) and len(eval_df) and eval_df.shape[1] > 0:
        cols = [c for c in eval_df.columns if "circuito1" in str(c).lower()]
        if cols:
            # Heurística: tomar la última columna de circuito1
            try:
                final_t1_after_last = _safe_float(eval_df[cols[-1]].values[-1])
            except Exception:
                final_t1_after_last = np.nan

    if best_t1 is None or math.isnan(best_t1) or final_t1_after_last is None or math.isnan(final_t1_after_last):
        return out  # no podemos computar

    forget_abs = max(0.0, final_t1_after_last - best_t1)
    forget_rel = forget_abs / max(1e-9, best_t1)
    out["circuito1_forget_abs"] = forget_abs
    out["circuito1_forget_rel"] = forget_rel
    out["circuito2_forget_abs"] = 0.0
    out["circuito2_forget_rel"] = 0.0
    out["avg_forget_rel"] = forget_rel
    return out

# --- 5) Tabla completa reconstruida desde disco ---
def build_results_table_from_disk(base_out: Path) -> pd.DataFrame:
    rows = []
    run_dirs = [p for p in base_out.glob("continual_*") if p.is_dir()]
    print(f"[INFO] Escaneando {len(run_dirs)} runs en {base_out}")

    for rd in run_dirs:
        meta     = _parse_basic_meta(rd)
        per_task = _read_per_task_perf(rd)
        eff_kg, elapsed = _read_efficiency(rd)
        forget_js = _read_forgetting(rd) or {}
        eval_df   = _read_eval_matrix(rd)

        # MAEs por tarea (per_task puede ser dict o list -> ya normalizado a dict)
        c1_best = c1_final = c2_best = c2_final = np.nan
        if isinstance(per_task, dict):
            for k, v in per_task.items():
                k_low = str(k).lower()
                if "circuito1" in k_low and isinstance(v, dict):
                    c1_best  = _safe_float(v.get("best_mae"), default=c1_best)
                    c1_final = _safe_float(v.get("final_mae"), default=c1_final)
                elif "circuito2" in k_low and isinstance(v, dict):
                    c2_best  = _safe_float(v.get("best_mae"), default=c2_best)
                    c2_final = _safe_float(v.get("final_mae"), default=c2_final)

        # Olvido: 1) forgetting.json si existe; 2) si no, eval_matrix
        f_c1_abs = _safe_float(forget_js.get("circuito1_forget_abs"))
        f_c1_rel = _safe_float(forget_js.get("circuito1_forget_rel"))
        f_c2_abs = _safe_float(forget_js.get("circuito2_forget_abs"))
        f_c2_rel = _safe_float(forget_js.get("circuito2_forget_rel"))
        avg_f_rel = _safe_float(forget_js.get("avg_forget_rel"))

        if all(math.isnan(x) for x in [f_c1_abs, f_c1_rel, f_c2_abs, f_c2_rel, avg_f_rel]):
            comp = _compute_forgetting_from_eval_matrix(eval_df, per_task)
            if comp:
                f_c1_abs = comp.get("circuito1_forget_abs", f_c1_abs)
                f_c1_rel = comp.get("circuito1_forget_rel", f_c1_rel)
                f_c2_abs = comp.get("circuito2_forget_abs", f_c2_abs)
                f_c2_rel = comp.get("circuito2_forget_rel", f_c2_rel)
                avg_f_rel = comp.get("avg_forget_rel", avg_f_rel)

        row = dict(
            run_dir=str(rd.relative_to(base_out)),
            preset=meta["preset"],
            method=meta["method"],
            encoder=meta["encoder"],
            model=meta["model"],
            seed=meta["seed"],
            T=meta["T"],
            batch_size=meta["batch_size"],
            amp=meta["amp"],

            emissions_kg=eff_kg,
            elapsed_sec=elapsed,

            circuito1_best_mae=c1_best,
            circuito1_final_mae=c1_final,
            circuito2_best_mae=c2_best,
            circuito2_final_mae=c2_final,

            circuito1_forget_abs=f_c1_abs,
            circuito1_forget_rel=f_c1_rel,
            circuito2_forget_abs=f_c2_abs,
            circuito2_forget_rel=f_c2_rel,
            avg_forget_rel=avg_f_rel,
        )
        rows.append(row)

    df = pd.DataFrame(rows)

    # Flags extra
    df["is_new_runner"] = df["run_dir"].apply(
        lambda rd: (_abs_run_dir(rd) / "run_row.json").exists() or (_abs_run_dir(rd) / "run_row.csv").exists()
    )
    df["mtime"] = df["run_dir"].apply(run_mtime)
    df["mtime_dt"] = pd.to_datetime(df["mtime"], unit="s")
    df["method_base"] = df["method"].astype(str).apply(canonical_method)

    # Tipos numéricos
    for c in [
        "seed","T","batch_size","emissions_kg","elapsed_sec",
        "circuito1_best_mae","circuito1_final_mae","circuito2_best_mae","circuito2_final_mae",
        "circuito1_forget_abs","circuito1_forget_rel","circuito2_forget_abs","circuito2_forget_rel","avg_forget_rel"
    ]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    out_csv = SUMMARY / "results_table_fromdisk.csv"
    df.to_csv(out_csv, index=False)
    print(f"[OK] results_table_fromdisk → {out_csv} | filas:", len(df))

    # Diagnóstico de NaNs globales (para tu tranquilidad)
    nan_cols = ["circuito1_best_mae","circuito1_final_mae","circuito2_best_mae","circuito2_final_mae","avg_forget_rel"]
    print("[DEBUG] NaNs globales:", {c:int(df[c].isna().sum()) for c in nan_cols if c in df})

    return df

# --- Construye tabla desde disco y muestra una vista rápida ---
df_all = build_results_table_from_disk(OUT)
display(df_all.head(3))


In [None]:
# Celda 3 — Preselección y (opcional) exigir eval_matrix

def _preselect(df):
    dd = df.copy()
    if PRESET_FILTER:
        dd = dd[dd["preset"] == PRESET_FILTER]
    if ENCODER_FILTER:
        dd = dd[dd["encoder"] == ENCODER_FILTER]
    if SEED_FILTER is not None:
        dd = dd[dd["seed"] == SEED_FILTER]
    if METHODS_KEEP:
        dd = dd[dd["method_base"].isin(METHODS_KEEP)]
    if ONLY_NEW_RUNNER and "is_new_runner" in dd.columns:
        dd = dd[dd["is_new_runner"] == True]
    if MTIME_FROM is not None:
        dd = dd[dd["mtime"] >= MTIME_FROM.timestamp()]
    if TAG_INCLUDE_SUBSTR:
        mask = dd["run_dir"].astype(str).apply(lambda s: any(t in s for t in TAG_INCLUDE_SUBSTR))
        dd = dd[mask]
    if RUN_DIR_WHITELIST:
        dd = dd[dd["run_dir"].isin(RUN_DIR_WHITELIST)]
    return dd

df_pre = _preselect(df_all)
print(f"[INFO] Preselección: {len(df_pre)} runs.")

if REQUIRE_EVALMATRIX:
    def _has_eval(rd):
        p = _abs_run_dir(rd)
        return (p / "eval_matrix.csv").exists() or (p / "eval_matrix.json").exists()
    df_pre = df_pre[df_pre["run_dir"].apply(_has_eval)]
    print(f"[INFO] Con eval_matrix: {len(df_pre)} runs.")

display(df_pre.head(5))


In [None]:
# Celda 4 — Filtro final (preset/encoder/seed/fecha/métodos) + comparabilidad robusta

import math
import numpy as np
import pandas as pd
from pathlib import Path

# Helper silencioso
def _read_json_silent(p: Path):
    try:
        if p.exists():
            import json
            return json.loads(p.read_text(encoding="utf-8"))
    except Exception:
        pass
    return {}

def _to_bool(x):
    if isinstance(x, bool): return x
    if x is None or (isinstance(x, float) and np.isnan(x)): return None
    s = str(x).strip().lower()
    if s in {"true","1","yes","y"}: return True
    if s in {"false","0","no","n"}: return False
    return None

def _batch_size_filled(row: pd.Series) -> float:
    """Backfill de batch_size mirando run_row.json o task_1_circuito1/manifest.json."""
    bs = pd.to_numeric(row.get("batch_size"), errors="coerce")
    if pd.notna(bs):
        return float(bs)
    rd = _abs_run_dir(row["run_dir"])
    # run_row.json
    j = _read_json_silent(rd / "run_row.json") or {}
    cand = j.get("batch_size", None)
    if cand is None:
        cand = (j.get("meta", {}) if isinstance(j, dict) else {}).get("batch_size")
    if cand is not None:
        try:
            return float(cand)
        except Exception:
            pass
    # manifest del primer task
    man = _read_json_silent(rd / "task_1_circuito1" / "manifest.json")
    meta = man.get("meta", {}) if isinstance(man, dict) else {}
    cand = meta.get("batch_size")
    if cand is not None:
        try:
            return float(cand)
        except Exception:
            pass
    return float("nan")  # equivalente a np.nan

# 1) Filtros “duros”
df_sel = df_all.copy()

dbg_counts = {"inicio": len(df_sel)}

if PRESET_FILTER:
    df_sel = df_sel[df_sel["preset"] == PRESET_FILTER]
dbg_counts["preset"] = len(df_sel)

if ENCODER_FILTER:
    df_sel = df_sel[df_sel["encoder"] == ENCODER_FILTER]
dbg_counts["encoder"] = len(df_sel)

if SEED_FILTER is not None:
    df_sel = df_sel[df_sel["seed"] == SEED_FILTER]
dbg_counts["seed"] = len(df_sel)

if METHODS_KEEP:
    df_sel = df_sel[df_sel["method_base"].isin(METHODS_KEEP)]
dbg_counts["methods_keep"] = len(df_sel)

if ONLY_NEW_RUNNER and "is_new_runner" in df_sel.columns:
    df_sel = df_sel[df_sel["is_new_runner"] == True]
dbg_counts["only_new_runner"] = len(df_sel)

if MTIME_FROM is not None:
    df_sel = df_sel[df_sel["mtime"] >= MTIME_FROM.timestamp()]
dbg_counts["mtime_from"] = len(df_sel)

if TAG_INCLUDE_SUBSTR:
    mask = df_sel["run_dir"].astype(str).apply(lambda s: any(t in s for t in TAG_INCLUDE_SUBSTR))
    df_sel = df_sel[mask]
    dbg_counts["tag_include"] = len(df_sel)

if RUN_DIR_WHITELIST:
    df_sel = df_sel[df_sel["run_dir"].isin(RUN_DIR_WHITELIST)]
    dbg_counts["whitelist"] = len(df_sel)

print("[DEBUG] filtros duros →", " | ".join(f"{k} → {v} runs" for k,v in dbg_counts.items()))

if df_sel.empty:
    print("[WARN] Tras filtros duros no quedan runs. Usando df_all sin filtros para no cortar el flujo.")
    df_sel = df_all.copy()

# 2) Comparabilidad (mismo modelo/T/amp/batch_size), con relajaciones por fases
df_sel = df_sel.copy()
df_sel["batch_size_filled"] = df_sel.apply(_batch_size_filled, axis=1)

model_ok = df_sel["model"].astype(str).str.contains(MODEL_MATCH_SUBSTR, na=False) if MODEL_MATCH_SUBSTR else pd.Series(True, index=df_sel.index)
T_ok     = (pd.to_numeric(df_sel["T"], errors="coerce") == T_TARGET) if (T_TARGET is not None) else pd.Series(True, index=df_sel.index)

if AMP_REQUIRED:
    amp_s = df_sel["amp"].apply(_to_bool)
    amp_ok = amp_s.fillna(False)
else:
    amp_ok = pd.Series(True, index=df_sel.index)

bs = pd.to_numeric(df_sel["batch_size_filled"], errors="coerce")
bs_ok = bs.eq(BATCH_SIZE_TARGET) if (BATCH_SIZE_TARGET is not None) else pd.Series(True, index=df_sel.index)

print("[DEBUG] Comparabilidad — resúmenes")
print(" - modelos presentes:", sorted(df_sel["model"].astype(str).unique().tolist()))
print(" - T únicos:", sorted(pd.to_numeric(df_sel["T"], errors="coerce").dropna().unique().tolist()))
print(" - AMP valores:", dict(pd.Series(amp_s if AMP_REQUIRED else [True]*len(df_sel)).value_counts(dropna=False)))
print(f" - batch_size_filled (≠ {BATCH_SIZE_TARGET}):", sorted(pd.to_numeric(df_sel.loc[~bs_ok, "batch_size_filled"], errors='coerce').dropna().unique().tolist()))

candidates = [
    (model_ok & T_ok & amp_ok & bs_ok,               "strict: model+T+amp+batch"),
    (model_ok & T_ok & amp_ok,                       "relaxed: sin batch"),
    (model_ok & T_ok,                                "relaxed: sin amp & batch"),
    (model_ok,                                       "relaxed: sólo model"),
    (pd.Series(True, index=df_sel.index),            "relaxed: sin comparabilidad")
]

df_kept = None
chosen_reason = None
for mask, reason in candidates:
    tmp = df_sel[mask].copy()
    if len(tmp) > 0:
        df_kept = tmp
        chosen_reason = reason
        break

if df_kept is None or df_kept.empty:
    print("[WARN] Comparabilidad dejó 0 runs incluso tras relajar. Continuamos con df_sel sin comparabilidad para no cortar el flujo.")
    df_kept = df_sel.copy()
    chosen_reason = "fallback total"

print(f"[DEBUG] comparabilidad → kept={len(df_kept)}, dropped={len(df_sel)-len(df_kept)} | estrategia: {chosen_reason}")

# 3) Asegura columnas métricas clave
# Identifica columnas *_final_mae y escoge la de la última tarea (heurística)
task_cols = [c for c in df_kept.columns if c.endswith("_final_mae")]
assert task_cols, "No se encuentran columnas *_final_mae."

def _key(col):
    base = col.replace("_final_mae","")
    m = re.search(r"(\d+)$", base)
    idx = int(m.group(1)) if m else 0
    base = re.sub(r"\d+$","", base)
    return (base, idx)

task_cols_sorted = sorted(task_cols, key=_key)
MAE_COL = task_cols_sorted[-1]
MAE_TASK_NAME = MAE_COL.replace("_final_mae","")

# Asegura tipos numéricos
for c in [MAE_COL, "avg_forget_rel", "emissions_kg", "elapsed_sec"]:
    if c not in df_kept.columns:
        df_kept[c] = np.nan
    df_kept[c] = pd.to_numeric(df_kept[c], errors="coerce")

# Rellenos prudentes
if df_kept["emissions_kg"].isna().all():
    df_kept["emissions_kg"] = 0.0
else:
    df_kept["emissions_kg"] = df_kept["emissions_kg"].fillna(df_kept["emissions_kg"].median())

if df_kept["avg_forget_rel"].isna().all():
    df_kept["avg_forget_rel"] = 0.0
else:
    df_kept["avg_forget_rel"] = df_kept["avg_forget_rel"].fillna(df_kept["avg_forget_rel"].max())

# Guarda la tabla de selección (foto del corte)
sel_csv = THIS_SUMMARY / "selection_table.csv"
df_kept.to_csv(sel_csv, index=False)
print(f"[OK] Selección final → {sel_csv} | filas:", len(df_kept))
display(df_kept.head(10))


Celda 3 — (Opcional) re-evaluar eval_matrix donde falte, solo sobre pre-selección:

In [None]:
# Celda 5 — Winners, TopN, Pareto y gráficos (tolerante a NaNs)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def winners_per_method(dfin: pd.DataFrame, mae_col: str) -> pd.DataFrame:
    x = dfin.dropna(subset=[mae_col]).copy()
    if x.empty:
        return dfin.copy().head(0)
    order = [mae_col, "avg_forget_rel", "emissions_kg"]
    x = x.sort_values(order, ascending=[True, True, True])
    x = x.drop_duplicates(subset=["method_base"], keep="first")
    return x.sort_values(order, ascending=[True, True, True])

def composite_score(dfin: pd.DataFrame, mae_col: str, alpha=0.5) -> pd.Series:
    x = dfin.copy()
    # Normalizamos con recorte robusto a percentiles y tolerando NaNs
    def _norm(col):
        v = pd.to_numeric(x[col], errors="coerce")
        if v.notna().sum() == 0:
            return pd.Series(np.nan, index=x.index)
        lo, hi = np.nanpercentile(v, 5), np.nanpercentile(v, 95)
        rng = max(1e-9, (hi - lo))
        y = (v - lo) / rng
        return y.clip(0, 1)
    x[mae_col+"_norm"] = _norm(mae_col)
    x["avg_forget_rel_norm"] = _norm("avg_forget_rel")
    return alpha * x[mae_col+"_norm"] + (1.0 - alpha) * x["avg_forget_rel_norm"]

def pareto_front(dfin: pd.DataFrame, xcol: str, ycol: str) -> pd.DataFrame:
    x = dfin.dropna(subset=[xcol, ycol]).copy()
    if x.empty:
        return x
    pts = x[[xcol, ycol]].values.astype(float)
    n = len(pts)
    dom = np.zeros(n, dtype=bool)
    for i in range(n):
        if dom[i]:
            continue
        for j in range(n):
            if i == j:
                continue
            if np.all(pts[j] <= pts[i]) and np.any(pts[j] < pts[i]):
                dom[i] = True
                break
    return x[~dom].sort_values([xcol, ycol], ascending=[True, True])

# 1) Winners por método
win_df = winners_per_method(df_kept, MAE_COL)
winners_csv = THIS_SUMMARY / "winners_per_method.csv"
win_df.to_csv(winners_csv, index=False)
print("[OK] Winners →", winners_csv)
display(win_df)

# 2) Top-N por score compuesto
df_rank = df_kept.copy()
df_rank["score"] = composite_score(df_rank, MAE_COL, alpha=ALPHA_COMPOSITE)
df_rank2 = df_rank.dropna(subset=["score", MAE_COL]).copy()
topn_df = df_rank2.sort_values(["score", MAE_COL, "avg_forget_rel"]).head(TOPN)
topn_csv = THIS_SUMMARY / f"top{TOPN}_composite.csv"
topn_df.to_csv(topn_csv, index=False)
print(f"[OK] Top-{TOPN} →", topn_csv)
display(topn_df[["run_dir","method_base","seed",MAE_COL,"avg_forget_rel","emissions_kg","score"]])

# 3) Frente de Pareto (MAE vs Olvido)
pareto_df = pareto_front(df_kept, MAE_COL, "avg_forget_rel")
pareto_csv = THIS_SUMMARY / "pareto.csv"
pareto_df.to_csv(pareto_csv, index=False)
print("[OK] Pareto →", pareto_csv)
display(pareto_df)

# 4) Gráficos (solo si hay datos válidos)
def scatter_mae_forget(dfin: pd.DataFrame, title: str, outfile: Path):
    dd = dfin.dropna(subset=[MAE_COL, "avg_forget_rel"]).copy()
    if dd.empty:
        print(f"[INFO] {title}: sin datos suficientes para scatter (NaNs).")
        return
    fig, ax = plt.subplots(figsize=(7,6))
    for mb in sorted(dd["method_base"].dropna().unique()):
        ss = dd[dd["method_base"] == mb]
        ax.scatter(ss[MAE_COL], ss["avg_forget_rel"], label=mb, s=48)
    ax.set_xlabel(f"MAE final ({MAE_TASK_NAME}) ↓")
    ax.set_ylabel("Olvido relativo medio ↓")
    ax.set_title(title)
    ax.grid(True, alpha=0.3)
    ax.legend()
    fig.tight_layout()
    fig.savefig(outfile, dpi=150)
    plt.close(fig)
    print("[OK] Scatter →", outfile)

scatter_all_png = THIS_SUMMARY / "scatter_all.png"
scatter_mae_forget(df_kept, f"{SUMMARY_LABEL} — todos", scatter_all_png)

scatter_win_png = THIS_SUMMARY / "scatter_winners.png"
scatter_mae_forget(win_df if not win_df.empty else df_kept, f"{SUMMARY_LABEL} — winners", scatter_win_png)


Celda 4 — Filtros finales y comparabilidad:

In [None]:
# Celda 6 — Curvas y barras comparativas (opcional si tienes src/plots)
try:
    from src.plots import plot_across_runs, plot_mae_curves_for_run as plot_loss_curves_for_run
except Exception:
    plot_across_runs = None
    plot_loss_curves_for_run = None

# Curvas por run (val_loss/val_mae), restringido a la selección
if plot_loss_curves_for_run is not None:
    base_plots = THIS_SUMMARY / "plots_val_metrics"
    base_plots.mkdir(parents=True, exist_ok=True)
    for rd in sorted(set(df_sel["run_dir"].astype(str))):
        try:
            plot_loss_curves_for_run(_abs_run_dir(rd), base_plots, smooth_window=3)
        except Exception as e:
            print(f"[WARN] Falló curvas en {Path(rd).name}: {e}")
    print("[OK] Curvas por run →", base_plots)
else:
    print("[INFO] plot_mae_curves_for_run no disponible; saltando.")

# Barras y trade-offs “across runs”, restringido a la selección
if plot_across_runs is not None:
    dest_acc = THIS_SUMMARY / "plots_across_runs"
    dest_acc.mkdir(parents=True, exist_ok=True)
    try:
        plot_across_runs(df_sel, dest_acc)
        print("[OK] plot_across_runs →", dest_acc)
    except Exception as e:
        print("[WARN] plot_across_runs falló:", e)
else:
    print("[INFO] plot_across_runs no disponible; saltando.")


Celda 5 — Ganadores por método, Top-N y Pareto (+ gráficos):

In [None]:
# Celda 5 — Winners, TopN, Pareto y gráficos (tolerante a NaNs)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def winners_per_method(dfin: pd.DataFrame, mae_col: str) -> pd.DataFrame:
    x = dfin.dropna(subset=[mae_col]).copy()
    if x.empty:
        return dfin.copy().head(0)
    order = [mae_col, "avg_forget_rel", "emissions_kg"]
    x = x.sort_values(order, ascending=[True, True, True])
    x = x.drop_duplicates(subset=["method_base"], keep="first")
    return x.sort_values(order, ascending=[True, True, True])

def composite_score(dfin: pd.DataFrame, mae_col: str, alpha=0.5) -> pd.Series:
    x = dfin.copy()
    # Normalizamos con recorte robusto a percentiles y tolerando NaNs
    def _norm(col):
        v = pd.to_numeric(x[col], errors="coerce")
        if v.notna().sum() == 0:
            return pd.Series(np.nan, index=x.index)
        lo, hi = np.nanpercentile(v, 5), np.nanpercentile(v, 95)
        rng = max(1e-9, (hi - lo))
        y = (v - lo) / rng
        return y.clip(0, 1)
    x[mae_col+"_norm"] = _norm(mae_col)
    x["avg_forget_rel_norm"] = _norm("avg_forget_rel")
    return alpha * x[mae_col+"_norm"] + (1.0 - alpha) * x["avg_forget_rel_norm"]

def pareto_front(dfin: pd.DataFrame, xcol: str, ycol: str) -> pd.DataFrame:
    x = dfin.dropna(subset=[xcol, ycol]).copy()
    if x.empty:
        return x
    pts = x[[xcol, ycol]].values.astype(float)
    n = len(pts)
    dom = np.zeros(n, dtype=bool)
    for i in range(n):
        if dom[i]:
            continue
        for j in range(n):
            if i == j:
                continue
            if np.all(pts[j] <= pts[i]) and np.any(pts[j] < pts[i]):
                dom[i] = True
                break
    return x[~dom].sort_values([xcol, ycol], ascending=[True, True])

# 1) Winners por método
win_df = winners_per_method(df_kept, MAE_COL)
winners_csv = THIS_SUMMARY / "winners_per_method.csv"
win_df.to_csv(winners_csv, index=False)
print("[OK] Winners →", winners_csv)
display(win_df)

# 2) Top-N por score compuesto
df_rank = df_kept.copy()
df_rank["score"] = composite_score(df_rank, MAE_COL, alpha=ALPHA_COMPOSITE)
df_rank2 = df_rank.dropna(subset=["score", MAE_COL]).copy()
topn_df = df_rank2.sort_values(["score", MAE_COL, "avg_forget_rel"]).head(TOPN)
topn_csv = THIS_SUMMARY / f"top{TOPN}_composite.csv"
topn_df.to_csv(topn_csv, index=False)
print(f"[OK] Top-{TOPN} →", topn_csv)
display(topn_df[["run_dir","method_base","seed",MAE_COL,"avg_forget_rel","emissions_kg","score"]])

# 3) Frente de Pareto (MAE vs Olvido)
pareto_df = pareto_front(df_kept, MAE_COL, "avg_forget_rel")
pareto_csv = THIS_SUMMARY / "pareto.csv"
pareto_df.to_csv(pareto_csv, index=False)
print("[OK] Pareto →", pareto_csv)
display(pareto_df)

# 4) Gráficos (solo si hay datos válidos)
def scatter_mae_forget(dfin: pd.DataFrame, title: str, outfile: Path):
    dd = dfin.dropna(subset=[MAE_COL, "avg_forget_rel"]).copy()
    if dd.empty:
        print(f"[INFO] {title}: sin datos suficientes para scatter (NaNs).")
        return
    fig, ax = plt.subplots(figsize=(7,6))
    for mb in sorted(dd["method_base"].dropna().unique()):
        ss = dd[dd["method_base"] == mb]
        ax.scatter(ss[MAE_COL], ss["avg_forget_rel"], label=mb, s=48)
    ax.set_xlabel(f"MAE final ({MAE_TASK_NAME}) ↓")
    ax.set_ylabel("Olvido relativo medio ↓")
    ax.set_title(title)
    ax.grid(True, alpha=0.3)
    ax.legend()
    fig.tight_layout()
    fig.savefig(outfile, dpi=150)
    plt.close(fig)
    print("[OK] Scatter →", outfile)

scatter_all_png = THIS_SUMMARY / "scatter_all.png"
scatter_mae_forget(df_kept, f"{SUMMARY_LABEL} — todos", scatter_all_png)

scatter_win_png = THIS_SUMMARY / "scatter_winners.png"
scatter_mae_forget(win_df if not win_df.empty else df_kept, f"{SUMMARY_LABEL} — winners", scatter_win_png)


Celda 6 — Plots por-run y “across runs” (sólo la selección)

In [None]:
# Celda 6 — Curvas y barras comparativas (opcional si tienes src/plots)
try:
    from src.plots import plot_across_runs, plot_mae_curves_for_run as plot_loss_curves_for_run
except Exception:
    plot_across_runs = None
    plot_loss_curves_for_run = None

# Curvas por run (val_loss/val_mae), restringido a la selección
if plot_loss_curves_for_run is not None:
    base_plots = THIS_SUMMARY / "plots_val_metrics"
    base_plots.mkdir(parents=True, exist_ok=True)
    for rd in sorted(set(df_sel["run_dir"].astype(str))):
        try:
            plot_loss_curves_for_run(_abs_run_dir(rd), base_plots, smooth_window=3)
        except Exception as e:
            print(f"[WARN] Falló curvas en {Path(rd).name}: {e}")
    print("[OK] Curvas por run →", base_plots)
else:
    print("[INFO] plot_mae_curves_for_run no disponible; saltando.")

# Barras y trade-offs “across runs”, restringido a la selección
if plot_across_runs is not None:
    dest_acc = THIS_SUMMARY / "plots_across_runs"
    dest_acc.mkdir(parents=True, exist_ok=True)
    try:
        plot_across_runs(df_sel, dest_acc)
        print("[OK] plot_across_runs →", dest_acc)
    except Exception as e:
        print("[WARN] plot_across_runs falló:", e)
else:
    print("[INFO] plot_across_runs no disponible; saltando.")


In [None]:
# EXTRA A — Diagnóstico de NaN (qué falta y por qué)
def diag_nans(df, cols):
    miss = df[df[cols].isna().any(axis=1)].copy()
    if miss.empty:
        print("[OK] No hay NaN en columnas:", cols)
        return miss
    print(f"[INFO] {len(miss)} runs con NaN en {cols}:")
    display(miss[["run_dir","method_base","preset","encoder","seed","T","amp","batch_size"] + cols].head(50))
    # pistas de ficheros clave
    for rd in miss["run_dir"].head(10):
        p = _abs_run_dir(rd)
        print("—", rd)
        for f in ["per_task_perf.json","per_task_perf.csv","forgetting.json","eval_matrix.csv","eval_matrix.json",
                  "efficiency_summary.json","emissions.csv","run_row.json","task_1_circuito1/manifest.json"]:
            print("   ", f, "→", "OK" if (p / f).exists() else "NO")
    return miss

cols_clave = ["circuito1_best_mae","circuito1_final_mae","circuito2_final_mae","avg_forget_rel"]
print("## NaN en selección ##")
_ = diag_nans(df_sel, cols_clave)

print("\n## NaN en df_all (global) ##")
_ = diag_nans(df_all, cols_clave)


Celda 7 — Auditoría rápida (por qué se excluye algo)

In [None]:
# EXTRA B — Sanidad de CL (naive debería olvidar más que métodos CL)
chk = (df_sel.groupby("method_base", as_index=False)
       .agg(forget_mean=("avg_forget_rel","mean"),
            mae_mean=(MAE_COL,"mean"),
            runs=("run_dir","count"))
       .sort_values("forget_mean"))
display(chk)

naive_forget = chk.loc[chk["method_base"]=="naive","forget_mean"]
if not naive_forget.empty:
    naive_val = naive_forget.values[0]
    cl_methods = chk[~chk["method_base"].isin(["naive"])]
    if not cl_methods.empty:
        cl_median = float(cl_methods["forget_mean"].median())
        print(f"[CHECK] naive_forget={naive_val:.4f} vs CL_median={cl_median:.4f}")
        if naive_val + 1e-6 >= cl_median:
            print("[OK] Naive olvida ≥ mediana CL (coherente).")
        else:
            print("[WARN] Naive NO olvida más que CL. Revisa eval_matrix/forgetting.json o implementación.")
