In [12]:
from utils import *
from utils_training import *
from utils_dataset import *
from utils_metrics import *

In [13]:
# The Training is defined by the parameters below

# A few rules:
# - The first metric will be the one used to decide whether to save the model or not
# - num_eval is the number of evaluation per epoch
# - The TRAIN_PATH dataset will be divided in a train and validation set (with split train_frac)
# - subsample_train/test_size is an integer

hyperparameters = {
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',       # device for training (should be GPU)
    'metrics_names': ['Accuracy', 'MatthewCorr', 'F1 Score'],       # Metrics on which to do the evaluations
    'num_eval': 10,                                                 # Number of evaluation per epoch
    'model_name': 'bert-base-cased',                                # Model to train (by default should be 'bert-base-cased')
    'seed': torch.randint(10000, size = (1,1)).squeeze(0).item(),   # Seed for training
    'dataset_name': 'QNLI',                                         # Name of the dataset ('CoLA', 'RTE', 'QNLI', 'SST-2')
    'batch_size': 8,                                                # Training batch size
    'lr': 5e-4,                                                     # Learning rate
    'TRAIN_PATH': 'data/QNLI/train.tsv',                            # Path to training set
    'TEST_PATH': 'data/QNLI/dev.tsv',                               # Path to validation set (will be our test set)
    'num_epochs': 20,                                               # Number of epochs
    'Finetuning': 'Full',                                    # Type of Fine-Tuning ('Full', 'BitFit', 'LayerNorm', 'Random', 'Init&BitFit', 'InitBias&BitFit')
    'subsample_train_size': None,                                   # Number of training samples, if None all the samples are used
    'subsample_test_size': None,                                    # Number of test samples, if None all the samples are used
    'max_length': 512,                                              # Maximum length of a sample (should depend on the model, 512 by default)
    'train_frac': 0.8,                                              # train/validation split ratio
    'grad_masks': None,                                             # gradient masks for the random Fine-Tuning    
    'ratio_params': None,                                           # Ratio of parameters trained for random fine-tuning
}

# Full Fine-Tuning

In [14]:
# Full finetuning for QNLI

seeds = [7200, 7516, 6665, 3575, 551]
hyperparameters['Finetuning'] = 'Full'
hyperparameters['lr'] = 3e-5

for seed in seeds:
    hyperparameters['seed'] = seed
    training(hyperparameters)



Full_bert-base-cased_QNLI_seed7200_lr3e-05_epochs20:


Full_bert-base-cased_QNLI_seed7200_lr3e-05_epochs20 already trained




Full_bert-base-cased_QNLI_seed7516_lr3e-05_epochs20:


Full_bert-base-cased_QNLI_seed7516_lr3e-05_epochs20 already trained




Full_bert-base-cased_QNLI_seed6665_lr3e-05_epochs20:


Full_bert-base-cased_QNLI_seed6665_lr3e-05_epochs20 already trained




Full_bert-base-cased_QNLI_seed3575_lr3e-05_epochs20:


Full_bert-base-cased_QNLI_seed3575_lr3e-05_epochs20 already trained




Full_bert-base-cased_QNLI_seed551_lr3e-05_epochs20:


Full_bert-base-cased_QNLI_seed551_lr3e-05_epochs20 already trained




# BitFit Fine-Tuning

In [15]:
# BitFit finetuning for QNLI

seeds = [7200, 7516, 6665, 3575, 551]
hyperparameters['Finetuning'] = 'BitFit'
lrs = [1e-4, 5e-4, 1e-3]

for lr in lrs:
  for seed  in seeds:

    hyperparameters['seed'] = seed
    hyperparameters['lr'] = lr
    training(hyperparameters)



BitFit_bert-base-cased_QNLI_seed7200_lr0.0001_epochs20:


BitFit_bert-base-cased_QNLI_seed7200_lr0.0001_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed7516_lr0.0001_epochs20:


BitFit_bert-base-cased_QNLI_seed7516_lr0.0001_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed6665_lr0.0001_epochs20:


BitFit_bert-base-cased_QNLI_seed6665_lr0.0001_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed3575_lr0.0001_epochs20:


BitFit_bert-base-cased_QNLI_seed3575_lr0.0001_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed551_lr0.0001_epochs20:


BitFit_bert-base-cased_QNLI_seed551_lr0.0001_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed7200_lr0.0005_epochs20:


BitFit_bert-base-cased_QNLI_seed7200_lr0.0005_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed7516_lr0.0005_epochs20:


BitFit_bert-base-cased_QNLI_seed7516_lr0.0005_epochs20 already trained




BitFit_bert-base-cased_QNLI_seed6665_lr0.0005_epochs20:


BitFit_bert

# LayerNorm Fine-Tuning

In [16]:
# LayerNorm finetuning for QNLI
seeds = [7200, 7516, 6665, 3575, 551]
hyperparameters['Finetuning'] = 'LayerNorm'
lrs = [1e-4, 5e-4, 1e-3]

for lr in lrs:
  for seed in seeds:
      hyperparameters['seed'] = seed
      hyperparameters['lr'] = lr
      training(hyperparameters)



LayerNorm_bert-base-cased_QNLI_seed7200_lr0.0001_epochs20:


LayerNorm_bert-base-cased_QNLI_seed7200_lr0.0001_epochs20 already trained




LayerNorm_bert-base-cased_QNLI_seed7516_lr0.0001_epochs20:


LayerNorm_bert-base-cased_QNLI_seed7516_lr0.0001_epochs20 already trained




LayerNorm_bert-base-cased_QNLI_seed6665_lr0.0001_epochs20:


LayerNorm_bert-base-cased_QNLI_seed6665_lr0.0001_epochs20 already trained




LayerNorm_bert-base-cased_QNLI_seed3575_lr0.0001_epochs20:


LayerNorm_bert-base-cased_QNLI_seed3575_lr0.0001_epochs20 already trained




LayerNorm_bert-base-cased_QNLI_seed551_lr0.0001_epochs20:


LayerNorm_bert-base-cased_QNLI_seed551_lr0.0001_epochs20 already trained




LayerNorm_bert-base-cased_QNLI_seed7200_lr0.0005_epochs20:


LayerNorm_bert-base-cased_QNLI_seed7200_lr0.0005_epochs20 already trained




LayerNorm_bert-base-cased_QNLI_seed7516_lr0.0005_epochs20:


LayerNorm_bert-base-cased_QNLI_seed7516_lr0.0005_epochs20 already trained




LayerNorm_bert-base-cased_Q