In [1]:
from baseline_utils import create_dataframes_and_run_baseline, SEED_LIST
from sklearn.metrics import accuracy_score, f1_score

INFO:root:Loading true data
INFO:root:Loading prediction data


In [2]:
from collections import defaultdict
import numpy as np
from tqdm import tqdm

### Apply all baselines

In [3]:
df_list = []
for seed in SEED_LIST:
    result = create_dataframes_and_run_baseline(seed)
    df_list.append(result)

INFO:root:Creating dataframes for seed 1
INFO:root:Creating dataframes for seed 2
INFO:root:Creating dataframes for seed 3
INFO:root:Creating dataframes for seed 4


### Save all relevant baseline results
And then print them pseudo-latex style

In [4]:
baseline_list = ['baseline_gender','baseline_letter','baseline_letter2', #'baseline_len',
                        'baseline_combo','baseline_combo2', # "baseline_combo3"
                ]
result_dict = defaultdict(lambda:[])
for result in tqdm(df_list):
    for name, df in zip(["train","val","test"],result):
        labels=['en','e','empty','er','s']
        list_of_scores = []
        for baseline in baseline_list:
            pred = df[baseline]
            true = df.pred_class
            f1scores = f1_score(true,pred,average=None,labels=labels)
            individual = [100*f for f in f1scores]
            micro = 100*f1_score(true,pred,average='micro',labels=['en','e','empty','er','s'])
            macro = 100*f1_score(true,pred,average='macro',labels=['en','e','empty','er','s'])

            list_of_scores.append(np.array(individual + [micro,macro]))
        result_dict[name].append(list_of_scores)

100%|██████████| 4/4 [00:11<00:00,  2.94s/it]


In [7]:
baseline_amt = len(baseline_list)
seed_amt = len(SEED_LIST)
for name in ["train","val","test"]:
    print("Results for", name)
    print(*baseline_list, sep =' & ')
    scores = result_dict[name]
    list_of_scores = [np.zeros(7) for i in range(baseline_amt)]
    for seed_score in scores:
        for i in range(baseline_amt):
            list_of_scores[i] += seed_score[i]
    for i,z in enumerate(['en','e','z','er','s']+['Micro F1','Macro F1']):
        print(*['\\'+ z]+[round(scores[i]/seed_amt,1) for scores in list_of_scores]+['\\\\'], sep = ' & ')
    print('='*10)

Results for train
baseline_gender & baseline_letter & baseline_letter2 & baseline_combo & baseline_combo2
\en & 90.6 & 76.3 & 87.1 & 93.3 & 96.0 & \\
\e & 60.0 & 54.0 & 74.8 & 74.0 & 87.6 & \\
\z & 0.0 & 66.5 & 88.6 & 78.2 & 93.3 & \\
\er & 0.0 & 0.0 & 43.5 & 60.3 & 82.8 & \\
\s & 0.0 & 43.0 & 56.7 & 44.7 & 65.5 & \\
\Micro F1 & 65.7 & 66.2 & 81.6 & 81.9 & 91.3 & \\
\Macro F1 & 30.1 & 48.0 & 70.2 & 70.1 & 85.0 & \\
Results for val
baseline_gender & baseline_letter & baseline_letter2 & baseline_combo & baseline_combo2
\en & 90.3 & 76.1 & 87.2 & 93.4 & 95.6 & \\
\e & 59.8 & 55.8 & 75.0 & 73.6 & 87.3 & \\
\z & 0.0 & 67.5 & 88.3 & 79.0 & 92.5 & \\
\er & 0.0 & 0.0 & 41.1 & 55.6 & 78.6 & \\
\s & 0.0 & 39.1 & 49.1 & 41.2 & 55.2 & \\
\Micro F1 & 65.1 & 66.4 & 81.1 & 81.5 & 90.3 & \\
\Macro F1 & 30.0 & 47.7 & 68.1 & 68.6 & 81.9 & \\
Results for test
baseline_gender & baseline_letter & baseline_letter2 & baseline_combo & baseline_combo2
\en & 90.0 & 76.7 & 87.0 & 93.2 & 94.9 & \\
\e & 60.1 & 54.