## Training Task Adapters
Using randomized search, we identify optimal hyperparameters to train task specfic adapters on GLUE tasks.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd drive/MyDrive/cs7643-deep-learning-summer-2021/

/content/drive/MyDrive/cs7643-deep-learning-summer-2021


In [3]:
!pip install -Uqq adapter-transformers datasets

%load_ext autoreload
%autoreload 2
from task_utils import TaskModelArguments, TaskDataTrainingArguments, task_to_keys
from task import train_task_adapter
from transformers import (
    MultiLingAdapterArguments,
    TrainingArguments,
)

[K     |████████████████████████████████| 2.5 MB 7.8 MB/s 
[K     |████████████████████████████████| 542 kB 52.2 MB/s 
[K     |████████████████████████████████| 43 kB 2.1 MB/s 
[K     |████████████████████████████████| 895 kB 53.5 MB/s 
[K     |████████████████████████████████| 3.3 MB 51.3 MB/s 
[K     |████████████████████████████████| 243 kB 68.2 MB/s 
[K     |████████████████████████████████| 76 kB 5.9 MB/s 
[K     |████████████████████████████████| 118 kB 73.0 MB/s 
[?25h

### CoLA Task

In [4]:
def initParse(dictionary):
  model = TaskModelArguments(
      model_name_or_path=dictionary.get('model_name_or_path')
  )

  data = TaskDataTrainingArguments(
      task_name=dictionary.get('task_name'),
      max_seq_length=dictionary.get('max_seq_length'),
      pad_to_max_length=dictionary.get('pad_to_max_length')
  )

  training = TrainingArguments(
    adam_beta1=dictionary.get('adam_beta1'),
    adam_beta2=dictionary.get('adam_beta2'),
    adam_epsilon=dictionary.get('adam_epsilon'),
    learning_rate=dictionary.get('learning_rate'),
    warmup_ratio=dictionary.get('warmup_ratio'),
    warmup_steps=dictionary.get('warmup_steps'),
    weight_decay=dictionary.get('weight_decay'),
    do_train=dictionary.get('do_train'),
    do_eval=dictionary.get('do_train'),
    num_train_epochs=dictionary.get('num_train_epochs'), # CHANGE ME
    overwrite_output_dir=dictionary.get('overwrite_output_dir'),
    output_dir=f"./adapter/task/{dictionary.get('task_name')}",
  )

  adapter = MultiLingAdapterArguments(
      train_adapter=True,
      adapter_config="pfeiffer",
  )

  return model, data, training, adapter

In [30]:
import random
import itertools
import subprocess
import pandas as pd

def getParams(dictionary, limit):
  paramsList = [dict(zip(dictionary, v)) for v in itertools.product(*dictionary.values())]
  random.shuffle(paramsList)

  if limit is not False:
    paramsList = paramsList[0:min(limit, len(paramsList))]

  return paramsList

**Define Dictionary of Hyperparameters**

In [6]:
task = 'cola'
paramDictionary = {'task_name':[task],
                   'model_name_or_path':['roberta-base'],
                   'max_seq_length':[64,128,256],
                   'pad_to_max_length':[True],
                   #'per_device_train_batch_size':[32,64,128],
                   'adam_beta1':[.9,.99,.999],
                   'adam_beta2':[.999, .99, .9],
                   'adam_epsilon':[1e-8,1e-7,1e-6],
                   'learning_rate':[1e-4,1e-4],
                   'warmup_ratio':[0.0],
                   'warmup_steps':[0],
                   'weight_decay':[0.0],
                   'do_train':[True],
                   'do_eval':[True],
                   'num_train_epochs':[1],
                   'overwrite_output_dir':[True],
                   'adapter_config':['pfeiffer']
                   }

**Begin Looping**

In [33]:
limit = 5 #Numerical or False for no limit
paramsList = getParams(paramDictionary,limit)
output=[]
for p in paramsList:
  model, data, training, adapter = initParse(p)
  row = []
  row.extend(list(p.values()))
  train_stats, eval_stats = train_task_adapter(
    model_args=model, adapter_args=adapter, training_args=training, data_args=data
    )
  row.extend(list(train_stats.values()))
  row.extend(list(eval_stats.values()))

  output.append(row)

header = []
header.extend(list(p.keys()))
header.extend(list(train_stats.keys()))
header.extend(list(eval_stats.keys()))

output = pd.DataFrame(output, columns = header)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
07/26/2021 21:28:29 - INFO - task -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.99,
adam_epsilon=1e-08,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_steps=500,
evaluation_strategy=IntervalStrategy.NO,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
greater_is_better=None,
group_by_length=False,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=0.0001,

Step,Training Loss
500,0.5617
1000,0.4774


Saving model checkpoint to ./adapter/task/cola/checkpoint-500
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_adapter.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
tokenizer config file saved in ./adapter/task/cola/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./adapter/task/cola/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./adapter/task/cola/checkpoint-1000
Configuration saved in ./adapter/task/cola/checkpoint-1000/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-1000/cola/pytorch_adapter.bin
Configuration saved in ./adap

07/26/2021 21:31:01 - INFO - task -   ***** Eval results cola *****
07/26/2021 21:31:01 - INFO - task -     eval_loss = 0.5537135004997253
07/26/2021 21:31:01 - INFO - task -     eval_matthews_correlation = 0.40058872203446944
07/26/2021 21:31:01 - INFO - task -     eval_runtime = 8.4217
07/26/2021 21:31:01 - INFO - task -     eval_samples_per_second = 123.847
07/26/2021 21:31:01 - INFO - task -     eval_steps_per_second = 15.555
07/26/2021 21:31:01 - INFO - task -     epoch = 1.0
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
07/26/2021 21:31:01 - INFO - task -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.99,
adam_beta2=0.9,
adam_epsilon=1e-06,
dataloader_drop_last=False,
dataloader_num_workers

Step,Training Loss
500,0.5827
1000,0.5026


Saving model checkpoint to ./adapter/task/cola/checkpoint-500
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_adapter.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
tokenizer config file saved in ./adapter/task/cola/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./adapter/task/cola/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./adapter/task/cola/checkpoint-1000
Configuration saved in ./adapter/task/cola/checkpoint-1000/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-1000/cola/pytorch_adapter.bin
Configuration saved in ./adap

07/26/2021 21:33:39 - INFO - task -   ***** Eval results cola *****
07/26/2021 21:33:39 - INFO - task -     eval_loss = 0.5378082394599915
07/26/2021 21:33:39 - INFO - task -     eval_matthews_correlation = 0.38828919728832473
07/26/2021 21:33:39 - INFO - task -     eval_runtime = 8.7025
07/26/2021 21:33:39 - INFO - task -     eval_samples_per_second = 119.851
07/26/2021 21:33:39 - INFO - task -     eval_steps_per_second = 15.053
07/26/2021 21:33:39 - INFO - task -     epoch = 1.0
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
07/26/2021 21:33:39 - INFO - task -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.999,
adam_beta2=0.9,
adam_epsilon=1e-07,
dataloader_drop_last=False,
dataloader_num_worker

Step,Training Loss
500,0.5929
1000,0.5191


Saving model checkpoint to ./adapter/task/cola/checkpoint-500
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_adapter.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
tokenizer config file saved in ./adapter/task/cola/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./adapter/task/cola/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./adapter/task/cola/checkpoint-1000
Configuration saved in ./adapter/task/cola/checkpoint-1000/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-1000/cola/pytorch_adapter.bin
Configuration saved in ./adap

07/26/2021 21:36:20 - INFO - task -   ***** Eval results cola *****
07/26/2021 21:36:20 - INFO - task -     eval_loss = 0.5548438429832458
07/26/2021 21:36:20 - INFO - task -     eval_matthews_correlation = 0.34785429910257887
07/26/2021 21:36:20 - INFO - task -     eval_runtime = 8.7829
07/26/2021 21:36:20 - INFO - task -     eval_samples_per_second = 118.754
07/26/2021 21:36:20 - INFO - task -     eval_steps_per_second = 14.915
07/26/2021 21:36:20 - INFO - task -     epoch = 1.0
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
07/26/2021 21:36:20 - INFO - task -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.999,
adam_beta2=0.99,
adam_epsilon=1e-08,
dataloader_drop_last=False,
dataloader_num_worke

  0%|          | 0/2 [00:00<?, ?ba/s]

07/26/2021 21:36:25 - INFO - task -   Sample 1824 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'idx': 1824, 'input_ids': [0, 100, 5055, 14, 127, 1150, 6, 37, 21, 3229, 25, 41, 37323, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 0, 'sentence': 'I acknowledged that my father, he was tight as an owl.'}.
07/26/2021 21:36:25 - INFO - task -   Sample 409 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'idx': 409, 'input_ids': [0, 2709, 123, 7, 109, 14, 74, 28, 10, 5021, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

Step,Training Loss
500,0.5836
1000,0.5222


Saving model checkpoint to ./adapter/task/cola/checkpoint-500
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_adapter.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
tokenizer config file saved in ./adapter/task/cola/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./adapter/task/cola/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./adapter/task/cola/checkpoint-1000
Configuration saved in ./adapter/task/cola/checkpoint-1000/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-1000/cola/pytorch_adapter.bin
Configuration saved in ./adap

07/26/2021 21:37:45 - INFO - task -   ***** Eval results cola *****
07/26/2021 21:37:45 - INFO - task -     eval_loss = 0.5568822026252747
07/26/2021 21:37:45 - INFO - task -     eval_matthews_correlation = 0.35742821499738897
07/26/2021 21:37:45 - INFO - task -     eval_runtime = 4.4919
07/26/2021 21:37:45 - INFO - task -     eval_samples_per_second = 232.196
07/26/2021 21:37:45 - INFO - task -     eval_steps_per_second = 29.164
07/26/2021 21:37:45 - INFO - task -     epoch = 1.0
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
07/26/2021 21:37:45 - INFO - task -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.999,
adam_beta2=0.999,
adam_epsilon=1e-08,
dataloader_drop_last=False,
dataloader_num_work

  0%|          | 0/2 [00:00<?, ?ba/s]

07/26/2021 21:37:50 - INFO - task -   Sample 1824 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'idx': 1824, 'input_ids': [0, 100, 5055, 14, 127, 1150, 6, 37, 21, 3229, 25, 41, 37323, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

Step,Training Loss
500,0.5877
1000,0.5064


Saving model checkpoint to ./adapter/task/cola/checkpoint-500
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_adapter.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
tokenizer config file saved in ./adapter/task/cola/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./adapter/task/cola/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./adapter/task/cola/checkpoint-1000
Configuration saved in ./adapter/task/cola/checkpoint-1000/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-1000/cola/pytorch_adapter.bin
Configuration saved in ./adap

07/26/2021 21:43:14 - INFO - task -   ***** Eval results cola *****
07/26/2021 21:43:14 - INFO - task -     eval_loss = 0.6077342629432678
07/26/2021 21:43:14 - INFO - task -     eval_matthews_correlation = 0.36704088018637543
07/26/2021 21:43:14 - INFO - task -     eval_runtime = 18.4852
07/26/2021 21:43:14 - INFO - task -     eval_samples_per_second = 56.424
07/26/2021 21:43:14 - INFO - task -     eval_steps_per_second = 7.087
07/26/2021 21:43:14 - INFO - task -     epoch = 1.0


In [34]:
output

Unnamed: 0,task_name,model_name_or_path,max_seq_length,pad_to_max_length,adam_beta1,adam_beta2,adam_epsilon,learning_rate,warmup_ratio,warmup_steps,weight_decay,do_train,do_eval,num_train_epochs,overwrite_output_dir,adapter_config,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss,epoch,eval_loss,eval_matthews_correlation,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch.1
0,cola,roberta-base,128,True,0.9,0.99,1e-08,0.0001,0.0,0,0.0,True,True,1,True,pfeiffer,136.7348,62.537,7.818,828331900000000.0,0.513692,1.0,0.553714,0.400589,8.4217,123.847,15.555,1.0
1,cola,roberta-base,128,True,0.99,0.9,1e-06,0.0001,0.0,0,0.0,True,True,1,True,pfeiffer,143.5196,59.581,7.448,828331900000000.0,0.535596,1.0,0.537808,0.388289,8.7025,119.851,15.053,1.0
2,cola,roberta-base,128,True,0.999,0.9,1e-07,0.0001,0.0,0,0.0,True,True,1,True,pfeiffer,145.8208,58.64,7.331,828331900000000.0,0.549654,1.0,0.554844,0.347854,8.7829,118.754,14.915,1.0
3,cola,roberta-base,64,True,0.999,0.99,1e-08,0.0001,0.0,0,0.0,True,True,1,True,pfeiffer,74.821,114.286,14.287,414166000000000.0,0.548469,1.0,0.556882,0.357428,4.4919,232.196,29.164,1.0
4,cola,roberta-base,256,True,0.999,0.999,1e-08,0.0001,0.0,0,0.0,True,True,1,True,pfeiffer,304.1001,28.119,3.515,1656664000000000.0,0.54212,1.0,0.607734,0.367041,18.4852,56.424,7.087,1.0
