# Imports
Run this block first to import all necessary libraries.

In [1]:
from functools import partial
from typing import List
import pandas as pd
import torch
import itertools
import pickle
from transformers import BertModel, BertTokenizer, RobertaTokenizer, RobertaModel
from utils.Datasets import TwitterDataset
from utils.Functions import train_loop, collate_batch, eval_loop, set_up_deterministic_environment
from utils.Models import TransformerClassifier, SiameseClassifier

# Paths, Variables and Setup
Update paths to point to the correct files if necessary, update variables, and run the setup code blocks.

In [2]:
# Paths
TRAIN_SET_PATH = '../data/dataset_splits/train.csv'
VAL_SET_PATH = '../data/dataset_splits/val.csv'
TEST_SET_PATH = '../data/dataset_splits/test.csv'
HYPERPARAM_RESULTS_PATH = '../data/model_eval/hyperparameters/{}.pkl'

In [3]:
# Variables
# Set for the specific model that should be trained
FREEZE_EMBEDDING_MODEL = True # True or False
MODEL_NAME="roberta-base" # "roberta-base" or "bert-base-uncased"
MODEL_TYPE="siamese" # "siamese" or "transformer"
# Fixed variables
RANDOM_SEED=42
BATCH_SIZE=16
MAX_EPOCHS=30
# Hyperparameters to test
LR=[1e-2, 1e-3, 1e-4]
DROPOUT=[0.1, 0.25, 0.5]

In [4]:
# Setup: Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Notebook Summary
This notebook contains the training loop for hyperparameter testing. On execution of the code blocks in "1. Hyperparameter Testing", a model will be created based on the settings above (FREEZE_EMBEDDING_MODEL, MODEL_NAME, MODEL_TYPE). The model will be tested for MAX_EPOCHS for each combination of LR and DROPOUT. The model will stop training if no improvement to EM is observed for 5 epochs. The best-performing model (highest EM) will be evaluated on the test set. The results will be stored to a pickle file. When all EIGHT model combinations have been trained, the results can be evaluated by running the code blocks in "2. Model Evaluation".

# 1. Hyperparameter Testing
The code blocks below execute the hyperparameter testing loop for the defined MODEL_NAME, MODEL_TYPE and FREEZE_EMBEDDING_MODEL setting.
We test the impact of dropout and learning rate on the exact match score of the model. The results are stored in a pickle file.

In [5]:
# Derive location of data from model name and param path
model_str = MODEL_NAME.split('-')[0] + ("_nofreeze" if not FREEZE_EMBEDDING_MODEL else "") + ("_siamese" if MODEL_TYPE == "siamese" else "")
HYPERPARAM_RESULTS_FILE = HYPERPARAM_RESULTS_PATH.format(model_str)

In [6]:
# Get tokenizer
if MODEL_NAME.startswith("bert"):
    tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
elif MODEL_NAME.startswith("roberta"):
    tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
else:
    raise ValueError(f"Unknown model name: {MODEL_NAME}")

In [7]:
set_up_deterministic_environment(RANDOM_SEED)
settings = itertools.product(LR, DROPOUT)
results = []

for i, (lr, dropout) in enumerate(settings, 1):
    print(f'Now training model ({model_str}): {dropout} dropout, {lr} starting LR. (Model {i}/{len(LR)*len(DROPOUT)})')

    # Get model
    if MODEL_NAME.startswith("bert"):
        model = BertModel.from_pretrained(MODEL_NAME)
    elif MODEL_NAME.startswith("roberta"):
        model = RobertaModel.from_pretrained(MODEL_NAME)
    else:
        raise ValueError(f"Unknown model name: {MODEL_NAME}")

    ds_train = TwitterDataset(TRAIN_SET_PATH, tokenizer, split_input=(MODEL_TYPE == "siamese"))
    ds_val = TwitterDataset(VAL_SET_PATH, tokenizer, split_input=(MODEL_TYPE == "siamese"))
    ds_test = TwitterDataset(TEST_SET_PATH, tokenizer, split_input=(MODEL_TYPE == "siamese"))
    if MODEL_TYPE == "transformer":
        cls = TransformerClassifier(model, dropout=dropout, freeze_embedding_model=FREEZE_EMBEDDING_MODEL).to(device)
    elif MODEL_TYPE == "siamese":
        cls = SiameseClassifier(model, freeze_embedding_model=FREEZE_EMBEDDING_MODEL, dropout=dropout).to(device)
    else:
        raise ValueError(f"Unknown model type: {MODEL_TYPE}")

    best_model, train_losses, val_losses, accs, ems = train_loop(cls, ds_train, ds_val, partial(collate_batch, input_padding=(1 if MODEL_NAME.startswith("roberta") else 0)), device, batch_size=BATCH_SIZE, max_epochs=MAX_EPOCHS, lr=lr, patience=5, name=f'{model_str}_dropout_{dropout}_lr_{lr}')
    test_loss, acc, em = eval_loop(best_model, ds_test, partial(collate_batch, input_padding=(1 if MODEL_NAME.startswith("roberta") else 0)), device, batch_size=BATCH_SIZE)
    results.append((lr, dropout, train_losses, val_losses, accs, ems, test_loss, acc, em))

Epoch Report: Epoch 17/30, Model roberta_siamese_dropout_0.5_lr_0.0001
- Train loss: 0.6258
- Val loss: 0.5364
- Acc: 0.7837
- EM: 0.4762
Optimizing for EM, current best: 0.4881 (No improvement for 4 epochs)




Early stopping.




Test Loss: 0.5353815713897347
Accuracy: 0.8015873015873016
EM: 0.5178571428571429





In [8]:
# Store results
with open(HYPERPARAM_RESULTS_FILE, 'wb') as f:
    pickle.dump(results, f)

# 2. Model Evaluation
The model evaluation assumes that files exist in the HYPERPARAM_RESULTS_PATH for all combinations of MODEL_NAME, MODEL_TYPE and FREEZE_EMBEDDING_MODEL. The code block below loads the results into separate dataframes for the transformer and Siamese models.

In [9]:
# Function to specifically deal with hyperparameter evaluation results
def load_hyperparam_eval(eval_path: str, models: List[str]) -> pd.DataFrame:
    best_model_info = []

    for model in models:
        path = eval_path.format(model)
        with open(path, 'rb') as f:
            results = pickle.load(f)

        for classifier in results:
            lr, dropout, train_losses, val_losses, accs, ems, test_loss, acc, em = classifier
            best_epoch = ems.index(max(ems)) + 1
            best_model_info.append((model, lr, dropout, best_epoch, test_loss, acc, em))

    df = pd.DataFrame(best_model_info, columns=["model", "lr", "dropout", "best_epoch", "test_loss", "acc", "em"])
    df[["test_loss", "acc", "em"]] = df[["test_loss", "acc", "em"]].round(3)
    return df

In [10]:
load_hyperparam_eval(HYPERPARAM_RESULTS_PATH, ["bert", "roberta", "bert_nofreeze", "roberta_nofreeze"]).sort_values("em", ascending=False)

Unnamed: 0,model,lr,dropout,best_epoch,test_loss,acc,em
25,bert_nofreeze,0.0001,0.25,5,0.596,0.853,0.613
24,bert_nofreeze,0.0001,0.1,6,0.647,0.827,0.548
26,bert_nofreeze,0.0001,0.5,4,0.45,0.839,0.548
7,bert,0.0001,0.25,17,0.639,0.726,0.321
6,bert,0.0001,0.1,8,0.687,0.72,0.315
3,bert,0.001,0.1,10,0.683,0.718,0.304
5,bert,0.001,0.5,15,0.695,0.712,0.286
8,bert,0.0001,0.5,20,0.661,0.71,0.28
4,bert,0.001,0.25,8,0.706,0.712,0.262
13,roberta,0.001,0.25,14,0.761,0.677,0.262


In [11]:
load_hyperparam_eval(HYPERPARAM_RESULTS_PATH, ["bert_siamese", "roberta_siamese", "bert_nofreeze_siamese", "roberta_nofreeze_siamese"]).sort_values("em", ascending=False)

Unnamed: 0,model,lr,dropout,best_epoch,test_loss,acc,em
25,bert_nofreeze_siamese,0.0001,0.25,7,0.637,0.847,0.589
24,bert_nofreeze_siamese,0.0001,0.1,4,0.481,0.839,0.577
26,bert_nofreeze_siamese,0.0001,0.5,2,0.513,0.837,0.565
15,roberta_siamese,0.0001,0.1,21,0.503,0.817,0.548
17,roberta_siamese,0.0001,0.5,13,0.535,0.802,0.518
16,roberta_siamese,0.0001,0.25,17,0.507,0.804,0.512
12,roberta_siamese,0.001,0.1,10,0.55,0.8,0.506
14,roberta_siamese,0.001,0.5,8,0.538,0.784,0.464
13,roberta_siamese,0.001,0.25,8,0.567,0.78,0.446
8,bert_siamese,0.0001,0.5,9,0.606,0.748,0.387
