# Task 2 · Paso 3 — Comparación RoBERTa vs DeBERTa vs ModernBERT
Entrenamos 3 modelos por 2 épocas y comparamos **Macro-F1** en test.

In [None]:

# !pip -q install -r ../requirements.txt
import os, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
from src.train_eval import set_seed, stratified_split_agnews, train_and_eval, MODEL_SPECS
set_seed(42)
splits = stratified_split_agnews()
{k: len(splits[k]) for k in ["train","validation","test"]}


In [None]:

results = []
for spec in MODEL_SPECS:
    try:
        out = train_and_eval(spec.id, splits, outdir=f"outputs/{spec.name.replace(' ','_')}", epochs=2.0)
        f1 = out["test_metrics"].get("macro_f1", None)
    except Exception as e:
        f1 = None
        out = {"test_metrics": {"macro_f1": None, "error": str(e)}}
    results.append({"model": spec.name, "macro_f1": f1, "raw": out})
import pandas as pd
df = pd.DataFrame(results)
display(df[["model","macro_f1"]])
df.to_csv("outputs/agnews_f1_results.csv", index=False)


In [None]:

import matplotlib.pyplot as plt, seaborn as sns
plt.figure(figsize=(6,4))
sns.barplot(data=df, x="model", y="macro_f1")
plt.title("AG News — Macro F1 por modelo")
plt.ylabel("Macro F1 (test)"); plt.xlabel("Modelo")
plt.ylim(0, 1); plt.tight_layout()
plt.savefig("outputs/agnews_f1_comparison.png"); plt.show()
