In [23]:
import numpy as np
import pandas as pd
from pathlib import Path

files = list((Path().resolve().parent / "experiments").rglob("xlmr_*/**/*.scores.txt"))

records = []
for file in files:
    items = str(file).split("/")
    model, lang = items[-3], items[-2]
    record = {"lang": lang, "model": model}
    for item in file.read_text().splitlines():
        key, value = item.split("\t")
        record[key] = float(value)
    records.append(record)

In [24]:
quality_tier = {
    "mi": "worst",
    "mni": "worst",
    "pag": "worst",
    "ace": "bad",
    "ban": "bad",
    "ht": "bad",
    "lij": "bad",
    "mai": "bad",
    "nso": "bad",
    "yo": "bad",
    "shn": "bad",
    "ks": "good",
    "lg": "good",
    "tn": "good",
    "awa": "good",
    "rn": "good",
    "zu": "good",
    "bm": "best",
    "ee": "best",
    "fon": "best",
    "ig": "best",
    "ln": "best",
    "ny": "best",
    "pcm": "best",
    "rw": "best",
    "sn": "best",
    "ti": "best",
    "ts": "best",
    "tw": "best",
    "wo": "best",
}

In [25]:
df = pd.DataFrame().from_records(records)
df['tier'] = df['lang'].apply(quality_tier.get)

In [26]:
df.to_csv('xlm-r-sib200-results.csv', index=False)

In [27]:
import altair as alt

plots = []
for tier in df["tier"].unique():
    selection = df["tier"] == tier

    tmp_plot = (
        alt.Chart(df[selection], title=f"Tier: {tier}")
        .mark_bar()
        .encode(
            x=alt.X("model", title=None, axis=alt.Axis(labelAngle=-45)).sort('ascending'),
            y=alt.Y("f1", title="F1", scale=alt.Scale(domain=[0, 1])),
            column=alt.Column("lang:N", title=None).sort('ascending'),
            color=alt.Color("model", legend=None),
        )
    )
    plots.append(tmp_plot)

plot = (
    alt.vconcat(*plots)
    .properties(
        title="XLM-R results SIB-200",
    )
    .configure_title(
        orient="top",
        anchor="middle",
    )
)
plot.save("xlmr-sib-200.png")

In [28]:
1/7

0.14285714285714285

In [23]:
df[df['lang'] == 'yo']

Unnamed: 0,lang,model,precision,recall,f1
1,yo,xlmr_eval_raw_wiki,0.219251,0.401961,0.283736
5,yo,xlmr_eval_pre_filtered,0.102282,0.22549,0.140729
9,yo,xlmr_eval_baseline,0.0625,0.25,0.1
