# Análisis de experimentos y métricas

Este notebook resume y visualiza los resultados de los experimentos almacenados en un directorio (por defecto `experiments/`).

**Estructura esperada (por ejecución):**

- `metrics.jsonl`: log de eventos con métricas por época.
- `summary.json`: resumen final (tiempos, hiperparámetros, métricas).
- `train_history.json` (opcional): historial de entrenamiento por época.

Si tu estructura difiere, ajusta las rutas y/o los parsers en las siguientes celdas.


In [None]:
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Dict, List

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")


In [None]:
# Ruta base donde se encuentran los experimentos
BASE_DIR = Path("experiments")

# Puedes cambiarlo, por ejemplo:
# BASE_DIR = Path("/ruta/a/experimentos")

BASE_DIR


In [None]:
def load_jsonl(path: Path) -> List[Dict[str, Any]]:
    records: List[Dict[str, Any]] = []
    with path.open("r", encoding="utf-8") as handle:
        for line in handle:
            line = line.strip()
            if not line:
                continue
            records.append(json.loads(line))
    return records


def load_json(path: Path) -> Dict[str, Any]:
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)


def find_runs(base_dir: Path) -> List[Path]:
    if not base_dir.exists():
        return []
    return sorted({p.parent for p in base_dir.rglob("metrics.jsonl")} | {p.parent for p in base_dir.rglob("summary.json")})


In [None]:
runs = find_runs(BASE_DIR)
print(f"Runs encontrados: {len(runs)}")

# Muestra los primeros 10
runs[:10]


In [None]:
def extract_config_from_events(events: List[Dict[str, Any]]) -> Dict[str, Any]:
    for item in events:
        if item.get("event") == "config":
            return item
    return {}


def extract_run_summary(run_dir: Path) -> Dict[str, Any]:
    summary_path = run_dir / "summary.json"
    metrics_path = run_dir / "metrics.jsonl"
    payload: Dict[str, Any] = {"run_dir": str(run_dir)}

    if summary_path.exists():
        summary = load_json(summary_path)
        payload.update({
            "method": summary.get("method"),
            "seed": summary.get("seed"),
            "model": summary.get("model"),
            "train_time_s": summary.get("train_time_s"),
            "probe_time_s": summary.get("probe_time_s"),
            "train_epochs": summary.get("train_epochs"),
            "probe_epochs": summary.get("probe_epochs"),
        })
        if isinstance(summary.get("probe_test"), dict):
            for key, value in summary["probe_test"].items():
                payload[f"probe_{key}"] = value
        if isinstance(summary.get("hparams"), dict):
            payload.update({f"hparam_{k}": v for k, v in summary["hparams"].items()})

    if metrics_path.exists():
        events = load_jsonl(metrics_path)
        config_event = extract_config_from_events(events)
        if config_event:
            payload.setdefault("method", config_event.get("method"))
            payload.setdefault("seed", config_event.get("seed"))
            payload.setdefault("model", config_event.get("model"))
            hparams = config_event.get("hparams")
            if isinstance(hparams, dict):
                payload.update({f"hparam_{k}": v for k, v in hparams.items()})

    return payload


summaries = [extract_run_summary(run_dir) for run_dir in runs]
summary_df = pd.DataFrame(summaries)
summary_df


## Tabla de resultados

Filtra o ordena la tabla para revisar las métricas principales. Puedes modificar `columns` para quedarte con los campos de interés.


In [None]:
columns = [
    "run_dir",
    "method",
    "seed",
    "model",
    "train_time_s",
    "probe_time_s",
    "train_epochs",
    "probe_epochs",
] + [col for col in summary_df.columns if col.startswith("probe_")]

summary_df[columns].sort_values(by=["method", "seed"]).reset_index(drop=True)


## Visualizar curvas de entrenamiento

Selecciona un `run_dir` y dibuja las métricas registradas por época.


In [None]:
def load_train_history(run_dir: Path) -> pd.DataFrame:
    history_path = run_dir / "train_history.json"
    if history_path.exists():
        history = load_json(history_path)
        return pd.DataFrame(history)
    # fallback: intenta reconstruir desde metrics.jsonl
    metrics_path = run_dir / "metrics.jsonl"
    if not metrics_path.exists():
        return pd.DataFrame()
    events = load_jsonl(metrics_path)
    rows = []
    for item in events:
        if item.get("event") == "train_epoch":
            metrics = item.get("metrics", {})
            row = {"epoch": item.get("epoch")}
            row.update(metrics)
            rows.append(row)
    return pd.DataFrame(rows)


if runs:
    run_dir = Path(runs[0])
else:
    run_dir = Path("/ruta/a/tu/run")

history_df = load_train_history(run_dir)
history_df.head()


In [None]:
if not history_df.empty:
    metric_cols = [col for col in history_df.columns if col not in {"epoch", "epoch_time_s"}]
    fig, axes = plt.subplots(len(metric_cols), 1, figsize=(8, 3 * len(metric_cols)), sharex=True)
    if len(metric_cols) == 1:
        axes = [axes]
    for ax, col in zip(axes, metric_cols):
        sns.lineplot(data=history_df, x="epoch", y=col, ax=ax)
        ax.set_title(f"{col} por época")
    plt.tight_layout()
else:
    print("No se encontró historial de entrenamiento para este run.")


## Comparación rápida entre ejecuciones

Ejemplo de comparación de una métrica específica (p.ej. `probe_factor_0`). Ajusta `metric_name` según tus datos.


In [None]:
metric_name = "probe_factor_0"

if metric_name in summary_df.columns:
    fig, ax = plt.subplots(figsize=(8, 4))
    sns.barplot(data=summary_df, x="method", y=metric_name, ax=ax, errorbar=None)
    ax.set_title(f"Comparación de {metric_name}")
    ax.set_ylabel(metric_name)
    plt.tight_layout()
else:
    print(f"La métrica '{metric_name}' no está disponible en summary_df.")


## Exportar resumen

Guarda la tabla de resultados en CSV para compartir o analizar fuera del notebook.


In [None]:
output_path = Path("experiment_summary.csv")
summary_df.to_csv(output_path, index=False)
output_path
