In [1]:
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import os
from pathlib import Path
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import json

load_dotenv()

PROJECT_ROOT = Path(os.getenv("PROJECT_ROOT")).resolve()  # type: ignore
DATA_ROOT = Path(os.getenv("DATA_ROOT")).resolve()  # type: ignore

def ema(values, alpha=0.98):
    """Exponential Moving Average smoothing"""
    smoothed = []
    m = None
    for v in values:
        m = v if m is None else alpha * m + (1 - alpha) * v
        smoothed.append(m)
    return smoothed

In [2]:
win_rate_root = DATA_ROOT / "experiment2" / "validation"
steps: list[list] = []
win_rates: list[list] = []
names = []

name_dict = {
    "cyclic": "Baseline (All edges)",
    "acyclic_reason_kahn": "FASO Reason-Kahn",
    "acyclic_no_reason_kahn": "FASO-Kahn",
    "acyclic_reason_deterministic": "FASO-Deterministic",
    "acyclic_no_reason_deterministic": "FASO Reason-Deterministic",
}

for sub in sorted(win_rate_root.iterdir()):
    if not sub.is_dir():
        with open(sub, "r") as f:
            loaded = json.load(f)
            assert isinstance(loaded, list)
        names.append(name_dict[sub.name[0:-13][0:-1]])
        step = []
        win_rate = []
        for item in loaded:
            assert isinstance(item, dict)
            step.append(item["step"])
            win_rate.append(item["win_rate"])
        sorted_pairs = sorted(zip(step, win_rate))
        step, win_rate = zip(*sorted_pairs)
        steps.append(step)
        win_rates.append(win_rate)

In [3]:
def plot_win_rate(steps, win_rates, names, save_path, for_paper=False):
    if for_paper:
        plt.rcParams.update({
            "font.family": "Times New Roman",
            "font.size": 9,
            "axes.labelsize": 9,
            "xtick.labelsize": 8,
            "ytick.labelsize": 8,
            "legend.fontsize": 8,
            "axes.unicode_minus": False
        })
        figsize = (3.5, 2.5)   # 논문용 크기
        lw = 1.0
        ms = 3
        add_grid = False
    else:
        figsize = (10, 6)
        lw = 1.8
        ms = 4
        add_grid = True

    plt.figure(figsize=figsize)
    for i in range(len(names)):
        plt.plot(
            steps[i], win_rates[i],
            label=names[i],
            linewidth=lw
        )

    plt.xlabel("Step")
    plt.ylabel("Win Rate Against SFT Model")
    plt.legend(frameon=False)
    if add_grid:
        plt.grid()
    plt.tight_layout()

    # 논문용은 pdf, 아니면 png
    ext = "png"
    plt.savefig(save_path.with_suffix(f".{ext}"), dpi=300)
    plt.close()

In [4]:
plot_win_rate(
    steps=steps,
    win_rates=win_rates,
    names=names,
    save_path=PROJECT_ROOT / "graphs" / "experiment2" / "comparisons" / "validation_win_rate",
    for_paper=True
)

In [14]:
print(*list(zip(names, win_rates)), sep='\n\n')

('FASO Reason-Deterministic', (0.7446043165467626, 0.8345323741007195, 0.8237410071942446, 0.8880866425992779, 0.8273381294964028, 0.8597122302158273, 0.8597122302158273, 0.8597122302158273, 0.8489208633093526, 0.8561151079136691, 0.8525179856115108, 0.8776978417266187, 0.8633093525179856))

('FASO-Kahn', (0.737410071942446, 0.8309352517985612, 0.8345323741007195, 0.8597122302158273, 0.8741007194244604, 0.8664259927797834, 0.8597122302158273, 0.8669064748201439, 0.8848920863309353, 0.8884892086330936, 0.9028776978417267))

('FASO-Deterministic', (0.7482014388489209, 0.8057553956834532, 0.8309352517985612, 0.8057553956834532, 0.8273381294964028, 0.8776978417266187, 0.8525179856115108, 0.8741007194244604, 0.8489208633093526, 0.8776978417266187, 0.8669064748201439, 0.8633093525179856, 0.8597122302158273, 0.8705035971223022))

('FASO Reason-Kahn', (0.7410071942446043, 0.7913669064748201, 0.8309352517985612, 0.8597122302158273, 0.8453237410071942, 0.8741007194244604, 0.8705035971223022, 0.8

In [15]:
print((0.8633093525179856+0.9028776978417267+0.8705035971223022+0.8489208633093526-0.8561151079136691 * 4) / 4 * 100)

1.5287769784172678
