In [182]:
import pandas as pd

In [183]:
def extract_metadata(model_paths):
    metadata = []
    for model_path in model_paths:
        md = {}
        name_parts = model_path.split(".")
        if "annotated" in name_parts:
            metadata.append({})
            continue
        md["base_model"] = name_parts[1].split("/")[-1]
        md["base_technique"] = name_parts[4]
        md["num_per_class"] = name_parts[5]
        technique = f"{md['base_technique']}.{md['num_per_class']}"
        if len(name_parts) > 6:
            md["fada_a"] = float(f"{name_parts[7]}.{name_parts[8]}")
            md["fada_f"] = float(f"{name_parts[10]}.{name_parts[11]}")
            md["fada_g"] = float(f"{name_parts[13]}.{name_parts[14]}")
            technique += f".a.{md['fada_a']}.f.{md['fada_f']}.g.{md['fada_g']}"
        md["technique"] = technique
        metadata.append(md)
    return metadata

In [184]:
index_order = [
    ('glue.sst2.original.10', '10'),
    ('glue.sst2.uniform.10', '10'),
    ('glue.sst2.fada.10.a.1.0.f.0.0.g.0.0', '10'),
    ('glue.sst2.fada.10.a.0.9.f.0.05.g.0.05', '10'),
    ('glue.sst2.fada.10.a.0.8.f.0.1.g.0.1', '10'),
    ('glue.sst2.fada.10.a.0.7.f.0.15.g.0.15', '10'),
    ('glue.sst2.fada.10.a.0.6.f.0.2.g.0.2', '10'),
    ('glue.sst2.fada.10.a.0.5.f.0.25.g.0.25', '10'),
    ('glue.sst2.fada.10.a.0.4.f.0.3.g.0.3', '10'),
    ('glue.sst2.fada.10.a.0.3.f.0.35.g.0.35', '10'),
    ('glue.sst2.fada.10.a.0.2.f.0.4.g.0.4', '10'),
    ('glue.sst2.fada.10.a.0.1.f.0.45.g.0.45', '10'),
    ('glue.sst2.original.200', '200'),
    ('glue.sst2.uniform.200', '200'),
    ('glue.sst2.fada.200.a.1.0.f.0.0.g.0.0', '200'),
    ('glue.sst2.fada.200.a.0.9.f.0.05.g.0.05', '200'),
    ('glue.sst2.fada.200.a.0.8.f.0.1.g.0.1', '200'),
    ('glue.sst2.fada.200.a.0.7.f.0.15.g.0.15', '200'),
    ('glue.sst2.fada.200.a.0.6.f.0.2.g.0.2', '200'),
    ('glue.sst2.fada.200.a.0.5.f.0.25.g.0.25', '200'),
    ('glue.sst2.fada.200.a.0.4.f.0.3.g.0.3', '200'),
    ('glue.sst2.fada.200.a.0.3.f.0.35.g.0.35', '200'),
    ('glue.sst2.fada.200.a.0.2.f.0.4.g.0.4', '200'),
    ('glue.sst2.fada.200.a.0.1.f.0.45.g.0.45', '200'),
    ('glue.sst2.uniform.2500', '2500'),
    ('glue.sst2.original.2500', '2500'),
    ('glue.sst2.fada.2500.a.1.0.f.0.0.g.0.0', '2500'),
    ('glue.sst2.fada.2500.a.0.9.f.0.05.g.0.05', '2500'),
    ('glue.sst2.fada.2500.a.0.8.f.0.1.g.0.1', '2500'),
    ('glue.sst2.fada.2500.a.0.7.f.0.15.g.0.15', '2500'),
    ('glue.sst2.fada.2500.a.0.6.f.0.2.g.0.2', '2500'),
    ('glue.sst2.fada.2500.a.0.5.f.0.25.g.0.25', '2500'),
    ('glue.sst2.fada.2500.a.0.4.f.0.3.g.0.3', '2500'),
    ('glue.sst2.fada.2500.a.0.3.f.0.35.g.0.35', '2500'),
    ('glue.sst2.fada.2500.a.0.2.f.0.4.g.0.4', '2500'),
    ('glue.sst2.fada.2500.a.0.1.f.0.45.g.0.45', '2500')
]

In [185]:
train_results = pd.read_csv("./eval/results/glue.sst2.training.csv")
train_md = extract_metadata(train_results["trained_model"])
train_results = pd.concat([train_results, pd.DataFrame(train_md)], axis=1)
train_results = train_results.loc[:,~train_results.columns.duplicated()]
train_final = train_results.groupby(by=["technique", "base_model", "num_per_class"])\
    .mean()\
    .reset_index()\
    .pivot_table("eval_f1", ["technique", "num_per_class"], "base_model")\
    .sort_values(["num_per_class", "technique"])
train_final = train_final.reindex(index_order)

robustness_results = pd.read_csv("./eval/results/glue.sst2.robustness.csv")
robustness_md = extract_metadata(robustness_results["fine_tuned_model_name"])
robustness_results = pd.concat([robustness_results, pd.DataFrame(robustness_md)], axis=1)
robustness_results = robustness_results.loc[:,~robustness_results.columns.duplicated()]

target_cols = [c for c in robustness_results.columns if "_f1" in c or "attack_success_" in c]
robustness_final = robustness_results.groupby(by=["technique", "base_model", "num_per_class"])\
    .mean()[target_cols]\
    .sort_values(["num_per_class", "technique"]).T

In [136]:
robustness_final.to_clipboard(excel=True)

In [187]:
train_final.to_clipboard(excel=True)