In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from itertools import chain
from pathlib import Path

params = {
    'axes.grid' : True,
    "grid.linestyle": '--',
    "font.family": "serif",
    "font.serif": "Times New Roman",
}

sns.set_style("ticks", params)
sns.set_context("paper", font_scale=1.5)
sns.set_palette("Set2")

In [None]:
pretrained_root_path = Path("<path to 7B pretrained flores results>")
best_adapters_root_path = Path("<path to 7B best adapters model flores results>")
finetune_root_path = Path("<path to 7B best finetuned model flores results>")

def load_scores(scores_file: Path):
    lines = scores_file.read_text().splitlines()
    scores = {}
    for line in lines:
        key, value = line.split(": ")
        scores[key] = float(value)
    return scores

def load_lp(data_root: Path, lp: str, instructions, ckpt: str):
    sys_scores_path = data_root / lp / ckpt / instructions / "sys_scores.txt"
    scores = load_scores(sys_scores_path)
    return {"lp": lp, **scores}


def load_results(data_root: Path, instructions, ckpt: str):
    results = []
    lps_dirs = [d for d in data_root.iterdir() if d.is_dir()]
    for lp_dir in lps_dirs:
        lp = lp_dir.name
        results.append(load_lp(data_root, lp, instructions, ckpt))
    return pd.DataFrame(results)


pretrained_zero_shot_results = load_results(pretrained_root_path, "zero_shot_instructions", "0")
pretrained_zero_shot_results["Model"] = "Pretrained"
pretrained_zero_shot_results["Context"] = "Zero-Shot"
pretrained_few_shot_results = load_results(pretrained_root_path, "few_shot_instructions2", "0")
pretrained_few_shot_results["Model"] = "Pretrained"
pretrained_few_shot_results["Context"] = "Five-Shot"
finetune_results = load_results(finetune_root_path, "zero_shot_instructions", "240000")
finetune_results["Model"] = "Finetuned"
finetune_results["Context"] = "Zero-Shot"
best_adapters_results = load_results(best_adapters_root_path, "zero_shot_instructions", "20000")
best_adapters_results["Model"] = "LoRA"
best_adapters_results["Context"] = "Zero-Shot"
results = pd.concat([pretrained_zero_shot_results, pretrained_few_shot_results, finetune_results, best_adapters_results])
results.rename(columns={"lp": "Language Pair" }, inplace=True)
results["COMET-22"] = results["COMET-22"] * 100
results["COMETKiwi"] = results["COMETKiwi"] * 100

non_eng_lang_order = ["de", "fr", "nl", "pt", "ru", "zh"]
lang_pairs = list(chain.from_iterable([[f"{lang}-en", f"en-{lang}"] for lang in non_eng_lang_order]))
models_order = ["Pretrained", "Finetuned", "LoRA"]
context_order = ["Zero-Shot", "Five-Shot"]

_, axes = plt.subplots(1, 3, figsize=(14, 3.5))
metrics = ["COMETKiwi", "BLEU", "chrF"]
metrics2ylabel = {
    "COMET-22": "COMET",
    "COMETKiwi": "COMETKiwi",
    "BLEU": "BLEU",
    "chrF": "chrF",
}
metrics2ylim = {
    "COMET-22": (60, 90),
    "COMETKiwi": (60, 90),
    "BLEU": (0, 50),
    "chrF": (0, 80),
}
# Dont show pretrained zero-shot in the plot as it is poor
plot_results = results[~((results["Model"] == "Pretrained") & (results["Context"] == "Zero-Shot"))].copy()

for i, (m, ax) in enumerate(zip(metrics, axes.flatten())):
    plot_legend = i == 0
    g = sns.barplot(
        data=plot_results, x="Language Pair", y=m, hue="Model",
        order=lang_pairs,
        #order=["En-XX", "XX-En"],
        hue_order=models_order,
        ax=ax,
    )
    if plot_legend:
        g.legend().set_title("")
    else:
        g.get_legend().remove()
    g.set_title(metrics2ylabel[m])
    g.set_ylabel("")
    g.set_xticklabels(g.get_xticklabels(),rotation=45)
    g.set_ylim(metrics2ylim[m])
    if plot_legend:
        sns.move_legend(
            g, "lower center",
            bbox_to_anchor=(1.72, -.5), ncol=3, title=None, frameon=True,
        )
plt.subplots_adjust(wspace=0.2)
#plt.savefig("figures/adapters_vs_finetuning_other_metrics.pdf", bbox_inches="tight", dpi=200)

def sort_func(x):
    if x in lang_pairs:
        return lang_pairs.index(x)
    elif x in models_order:
        return models_order.index(x)
    elif x in context_order:
        return context_order.index(x)
    else:
        raise ValueError(f"Unknown value {x}")

results.sort_values(by=["Language Pair", "Model", "Context"], inplace=True, key=lambda x: x.apply(sort_func))
results.reset_index(drop=True, inplace=True)
results = results[["Language Pair", "Model", "Context", "COMET-22", "COMETKiwi", "BLEU", "chrF"]]
results.rename(columns={"COMET-22": "COMET"}, inplace=True)
results.set_index(["Language Pair", "Model", "Context"], inplace=True)
results.to_latex("tables/adapters_vs_finetuning.tex", float_format="%.2f")