In [None]:
# Plot summaries generated by `debatebench summarize`
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

sns.set_theme(style="whitegrid")

viz_dir = Path("results/viz")

def show_winner_counts():
    df = pd.read_csv(viz_dir / "winner_counts.csv")
    ax = sns.barplot(x="winner", y="count", data=df, palette="muted")
    ax.set_title("Winner Distribution")
    plt.tight_layout()
    plt.show()

def show_topic_winrates():
    df = pd.read_csv(viz_dir / "topic_winrate.csv")
    df = df.set_index("topic_id")[["pro_wins", "con_wins", "ties"]]
    ax = df.plot(kind="bar", stacked=True, figsize=(8,4), color=["#4c72b0", "#c44e52", "#55a868"])
    ax.set_ylabel("Count")
    ax.set_title("Wins by Topic")
    plt.tight_layout()
    plt.show()

def show_model_dimension_heatmap():
    df = pd.read_csv(viz_dir / "model_dimension_avg.csv")
    pivot = df.pivot(index="model_id", columns="dimension", values="mean_score")
    plt.figure(figsize=(6, 3 + 0.4*len(pivot)))
    ax = sns.heatmap(pivot, annot=True, fmt=".2f", cmap="YlGnBu")
    ax.set_title("Per-Model Dimension Averages")
    plt.tight_layout()
    plt.show()

def show_judge_agreement():
    df = pd.read_csv(viz_dir / "judge_agreement.csv")
    judges = sorted(set(df.judge_a).union(df.judge_b))
    import numpy as np
    mat = pd.DataFrame(np.ones((len(judges), len(judges))), index=judges, columns=judges)
    for _, row in df.iterrows():
        mat.loc[row.judge_a, row.judge_b] = row.agreement_rate
        mat.loc[row.judge_b, row.judge_a] = row.agreement_rate
    plt.figure(figsize=(4 + 0.4*len(judges), 4 + 0.4*len(judges)))
    ax = sns.heatmap(mat, annot=True, fmt=".2f", cmap="Blues", vmin=0, vmax=1)
    ax.set_title("Judge Winner Agreement")
    plt.tight_layout()
    plt.show()

# Run all
show_winner_counts()
show_topic_winrates()
show_model_dimension_heatmap()
show_judge_agreement()
