In this notebook, the DEV results of the hyperparameter tuning are evaluated. 
Specifically, the following hyperparameters are tuned:
* the learning rate
* the BERT learning rate

Additionally, I report the performance for 5 different seeds

In [16]:
import json
import matplotlib.pyplot as plt
import os

import pandas as pd

## Model training results overview (dev), in terms of LEA score


In [17]:
def get_training_results(model_name):
    with open(f'data/train_logs/{model_name}') as json_file:
        logs = json.load(json_file)
    dev_f1s = [epoch['sl_f1'] for epoch in logs['dev_eval']]
    best_epoch = dev_f1s.index(max(dev_f1s))
    best_epoch_results = logs['dev_eval'][best_epoch]
    return [model_name.replace('.json',''),
            logs['epochs'], 
            logs['learning-rate'], 
            logs['bert-learning-rate'], 
            logs['seed'], 
            best_epoch + 1, 
            round(best_epoch_results['wl_p'],4)* 100,
            round(best_epoch_results['wl_r'],4)* 100,
            round(best_epoch_results['wl_f1'],4) * 100,
            round(best_epoch_results['sl_p'],4)* 100,
            round(best_epoch_results['sl_r'],4)* 100,
            round(best_epoch_results['sl_f1'],4)* 100,
           ]

In [18]:
col_names = ['name', 'epochs', 'learning-rate', 'bert-learning-rate', 'seed', 'best_epoch', 'wl P', 'wl R', 'wl F1', 'sl P', 'sl R', 'sl F1']
def get_training_overview(setting):
    files = [file for file in os.listdir("data/train_logs/") if "xlm" in file and setting in file]
    results = []
    for file in files:
        results.append(get_training_results(file))
    df = pd.DataFrame(results, columns=col_names)
    return df

#### learning rate

In [19]:
lr_df = get_training_overview('xlm_lr')
lr_df

Unnamed: 0,name,epochs,learning-rate,bert-learning-rate,seed,best_epoch,wl P,wl R,wl F1,sl P,sl R,sl F1
0,xlm_lr1e-4,20,0.0001,1e-05,2020,19,56.9,53.28,55.03,54.27,49.85,51.97
1,xlm_lr5e-5,20,5e-05,1e-05,2020,19,60.83,40.27,48.46,58.18,37.73,45.78
2,xlm_lr5e-6,20,5e-06,1e-05,2020,18,57.07,11.06,18.53,52.77,9.95,16.74
3,xlm_lr5e-4,20,0.0005,1e-05,2020,18,54.15,63.04,58.26,51.2,58.62,54.66
4,xlm_lr3e-5,20,3e-05,1e-05,2020,19,62.91,26.55,37.34,60.11,24.82,35.13
5,xlm_lr4e-4,20,0.0004,1e-05,2020,15,55.64,60.19,57.83,52.76,56.01,54.34
6,xlm_lr6e-4,20,0.0006,1e-05,2020,20,53.46,63.69,58.13,50.6,59.07,54.51
7,xlm_lr3e-4,20,0.0003,1e-05,2020,18,55.37,58.62,56.95,52.6,54.66,53.61
8,xlm_lr1e-5,20,1e-05,1e-05,2020,18,68.36,6.37,11.65,63.86,5.75,10.54
9,xlm_lr4e-5,20,4e-05,1e-05,2020,19,61.36,37.57,46.6,58.44,35.05,43.82


#### BERT learning rate

In [20]:
bertlr_df = get_training_overview('xlm_bert')
bertlr_df

Unnamed: 0,name,epochs,learning-rate,bert-learning-rate,seed,best_epoch,wl P,wl R,wl F1,sl P,sl R,sl F1
0,xlm_bertlr3e-5,20,0.0005,3e-05,2020,15,55.13,64.11,59.28,52.04,59.51,55.52
1,xlm_bertlr4e-5,20,0.0005,4e-05,2020,15,55.02,62.17,58.38,52.02,57.89,54.8
2,xlm_bertlr1e-4,20,0.0005,0.0001,2020,17,53.12,63.25,57.74,50.24,58.87,54.21
3,xlm_bertlr5e-5,20,0.0005,5e-05,2020,16,57.04,60.72,58.82,54.09,56.65,55.34
4,xlm_bertlr5e-6,20,0.0005,5e-06,2020,18,54.04,60.99,57.31,51.19,56.6,53.76
5,xlm_bertlr2e-5,20,0.0005,2e-05,2020,15,56.14,61.49,58.69,53.23,57.28,55.18


#### 5 different seeds

In [23]:
seed_df = get_training_overview('xlm_regular')
seed_df

Unnamed: 0,name,epochs,learning-rate,bert-learning-rate,seed,best_epoch,wl P,wl R,wl F1,sl P,sl R,sl F1
0,xlm_regular_123,21,0.0005,3e-05,123,18,52.13,67.43,58.8,49.47,62.9,55.38
1,xlm_regular_2020,21,0.0005,3e-05,2020,15,58.01,59.45,58.72,55.14,55.54,55.34
2,xlm_regular_248,20,0.0005,3e-05,248,18,55.13,63.06,58.83,52.06,58.58,55.13
3,xlm_regular_1234,21,0.0005,3e-05,1234,15,57.45,59.44,58.43,54.5,55.54,55.02
4,xlm_regular_2023,21,0.0005,3e-05,2023,17,57.15,60.97,59.0,54.02,56.77,55.36
