In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from pathlib import Path

In [None]:
data_root = Path("<path to results for adapter hyperparameters>")

def load_scores(scores_file: Path):
    lines = scores_file.read_text().splitlines()
    scores = {}
    for line in lines:
        key, value = line.split(": ")
        scores[key] = float(value)
    return scores

def load_lp(data_root: Path, model_id: str, lp: str):
    sys_scores_path = data_root / model_id / lp / "sys_scores.txt"
    scores = load_scores(sys_scores_path)
    return {"model_id": model_id, "lp": lp, **scores}


def load_results(data_root: Path):
    results = []
    model_ids_dirs = [d for d in data_root.iterdir() if d.is_dir()]
    for model_id_dir in model_ids_dirs:
        lps_dirs = [d for d in model_id_dir.iterdir() if d.is_dir()]
        for lp_dir in lps_dirs:
            model_id = model_id_dir.name
            lp = lp_dir.name
            results.append(load_lp(data_root, model_id, lp))
    return pd.DataFrame(results)

results = load_results(data_root)
results

In [None]:
sns.barplot(data=results, x="model_id", y="COMET-22")
plt.xticks(rotation=90)
plt.ylim(0.8, 0.9)

In [None]:
mean_results = results.groupby("model_id").mean()
mean_results.sort_values("COMET-22", ascending=False)
mean_results["COMET-22"] *= 100
mean_results["COMETKiwi"] *= 100
pd.options.display.float_format = '{:,.2f}'.format
mean_results

In [None]:
best_model = mean_results.sort_values("COMET-22", ascending=False).index[0]
best_model

In [None]:
best_model_results = results[results["model_id"] == best_model].copy()
sns.barplot(data=best_model_results, x="lp", y="COMET-22")
plt.ylim(0.7, 0.9)

In [None]:
best_model_results.rename(columns={"COMET-22": "comet22", "COMETKiwi": "cometkiwi", "model_id": "model", "BLEU": "bleu", "chrF": "chrf", "lp": "lang_pair"}, inplace=True)
best_model_id = best_model_results["model"].iloc[0]
best_model_results.replace(best_model_id, "7B_adapters", inplace=True)
best_model_results[["comet22", "cometkiwi"]] *= 100
best_model_results