In [None]:
import json
from pathlib import Path
import re
import pandas as pd
import plotly.express as px

In [None]:
BASE_EVAL_FOLDER = "./.eval/"

# manually added data
extra_data = {'en-ja-BERT-GPT2-LoRA': dict([
    (2500, 0.6847),(5000, 1.046),(7500, 1.838),(10000, 2.187),(12500, 2.648),
    (15000, 2.998), (17500, 2.982), (20000, 3.290), (22500, 3.681), (25000, 3.597)
])}

In [None]:
def load_scores(name):
    p = re.compile(r".*\\(.*)\\.*")
    data = {}
    for fname in Path(BASE_EVAL_FOLDER).glob(f"*/{name}"):
        if "news" in str(fname): 
            continue # skip news invalid format
        with fname.open("rb") as fp:
            ckp_data = json.load(fp)
            ckp_data = dict((int(k), ckp_data[k]["test_score"]) for k in ckp_data.keys())
            data[p.match(str(fname)).groups()[0]] = ckp_data
    return data

def convert_to_df_0(data):
    a = []
    for k1, d in data.items():
        for k2, v in d.items():
            if k1.endswith("-BERT-GPT2-xattn"):
                a.append({"model" : f"BERT-GPT2 (xattn) [{k1[:5]}]", "steps": k2, "score": v})
            elif k1.endswith("-BERT-GPT2-LoRA"):
                a.append({"model" : f"BERT-GPT2 (xattn+LoRA) [{k1[:5]}]", "steps": k2, "score": v})
            elif k1.endswith("-BERT-GPT2-xattn-LoRA"):
                a.append({"model" : f"BERT-GPT2 (xattn->LoRA) [{k1[:5]}]", "steps": k2, "score": v})
            elif k1.endswith("+bt-250k") and k1.startswith("en-ja"):
                a.append({"model" : f"mBART [{k1[:5]}]", "steps": k2, "score": v})
    df = pd.DataFrame(a)
    return df

def convert_to_df_1(data):
    a = []
    for k1, d in data.items():
        for k2, v in d.items():
            if k1.endswith("-bt-500k"):
                a.append({"model" : f"base+BT [{k1[:5]}]", "steps": k2, "score": v})
            elif k1.endswith("+bt-250k"):
                a.append({"model" : f"base [{k1[:5]}]", "steps": k2, "score": v})
            elif k1.endswith("-mixed-500k"):
                a.append({"model" : f"extended [{k1[:5]}]", "steps": k2, "score": v})
    df = pd.DataFrame(a)
    return df

#### BERT-GPT2 vs mBART

In [None]:
data = load_scores("flores_dev.json")
# data.update(extra_data)
px.line(
    convert_to_df_0(data), 
    x="steps", y="score", color="model", range_x=(2500, 25000), # range_y=(9, 15)
)

In [None]:
convert_to_df_0(load_scores("wmt_vat.json")).groupby("model").last()

#### mBART 

In [None]:
px.line(
    convert_to_df_1(load_scores("flores_dev.json")), 
    x="steps", y="score", color="model", range_x=(5000, 50000), range_y=(9, 15)
)

In [None]:
data = convert_to_df_1(load_scores("flores_dev.json"))
for model_type in data.model.unique():
    data1 = data[data.model == model_type]
    print(dict(data1.loc[data1.score.idxmax()]))

In [None]:
convert_to_df_1(load_scores("wmt_vat.json")).groupby("model").last()