## Training Task Adapters
Using randomized search, we identify optimal hyperparameters to train task specfic adapters on GLUE tasks.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd drive/MyDrive/cs7643-deep-learning-summer-2021/

In [1]:
# !pip install -Uqq adapter-transformers datasets

%load_ext autoreload
%autoreload 2
from task_utils import TaskModelArguments, TaskDataTrainingArguments, task_to_keys
from task import train_task_adapter
from transformers import (
    MultiLingAdapterArguments,
    TrainingArguments,
)

### CoLA Task

In [2]:
def initParse(dictionary):
  model = TaskModelArguments(
      model_name_or_path=dictionary.get('model_name_or_path')
  )

  data = TaskDataTrainingArguments(
      task_name=dictionary.get('task_name'),
      max_seq_length=dictionary.get('max_seq_length'),
      pad_to_max_length=dictionary.get('pad_to_max_length')
  )

  training = TrainingArguments(
    adam_beta1=dictionary.get('adam_beta1'),
    adam_beta2=dictionary.get('adam_beta2'),
    adam_epsilon=dictionary.get('adam_epsilon'),
    learning_rate=dictionary.get('learning_rate'),
    warmup_ratio=dictionary.get('warmup_ratio'),
    warmup_steps=dictionary.get('warmup_steps'),
    weight_decay=dictionary.get('weight_decay'),
    do_train=dictionary.get('do_train'),
    do_eval=dictionary.get('do_train'),
    per_device_train_batch_size=dictionary.get('per_device_train_batch_size'),
    num_train_epochs=dictionary.get('num_train_epochs'), # CHANGE ME
    overwrite_output_dir=dictionary.get('overwrite_output_dir'),
    output_dir=f"./adapter/task/{dictionary.get('task_name')}",
  )

  adapter = MultiLingAdapterArguments(
      train_adapter=True,
      adapter_config="pfeiffer",
  )

  return model, data, training, adapter

In [3]:
import random
import itertools
import subprocess
import pandas as pd

def getParams(dictionary, limit):
  paramsList = [dict(zip(dictionary, v)) for v in itertools.product(*dictionary.values())]
  random.shuffle(paramsList)

  if limit is not False:
    paramsList = paramsList[0:min(limit, len(paramsList))]

  return paramsList

**Define Dictionary of Hyperparameters**

In [4]:
glue_tasks = [
    "cola",
    "mnli",
    #"mrpc",
    "qnli",
    "qqp",
    "rte",
    "sst2",
    "stsb",
    "wnli",
]

In [5]:
task = 'cola'
paramDictionary = {'task_name':[task],
                   'model_name_or_path':['roberta-base'],
                   'max_seq_length':[64, 128, 256],
                   'pad_to_max_length':[True],
                   'per_device_train_batch_size':[8, 16, 32, 64, 128],
                   'adam_beta1':[.9],
                   'adam_beta2':[.999],
                   'adam_epsilon':[1e-8,1e-7,1e-6],
                   'learning_rate':[1e-6,1e-5,1e-4,1e-3],
                   'warmup_ratio':[0.0],
                   'warmup_steps':[0],
                   'weight_decay':[0.0],
                   'do_train':[True],
                   'do_eval':[True],
                   'num_train_epochs':[10],
                   'overwrite_output_dir':[True],
                   'adapter_config':['pfeiffer']
                   }

**Begin Looping**

In [None]:
limit = 15 #Numerical or False for no limit

output=[]
for data_set in glue_tasks:
    paramDictionary["task_name"] = [data_set]
    paramsList = getParams(paramDictionary, limit)
    for p in paramsList:
        model, data, training, adapter = initParse(p)
        row = []
        row.extend(list(p.values()))
        train_stats, eval_stats = train_task_adapter(
        model_args=model, adapter_args=adapter, training_args=training, data_args=data
        )
        row.extend(list(train_stats.values()))
        row.extend(list(eval_stats.values()))

        output.append(row)

header = []
header.extend(list(p.keys()))
header.extend(list(train_stats.keys()))
header.extend(list(eval_stats.keys()))

output = pd.DataFrame(output, columns = header)

07/26/2021 21:15:30 - INFO - task -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-06,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_steps=500,
evaluation_strategy=IntervalStrategy.NO,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
greater_is_better=None,
group_by_length=False,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=0.0001,
length_column_name=length,
load_best_model_at_end=False,
local_rank=-1,
log_level=-1,
log_level_replica=-1,
log_on_each_node=True,
logging_dir=./adapter/task/cola/runs/Jul26_21-15-29_alienware-r12,
logging_first_step=False,
logging_steps=500,
logging_strategy=IntervalStrategy.STEPS,
lr_schedul

Step,Training Loss
500,0.5293
1000,0.4232
1500,0.3984


Saving model checkpoint to ./adapter/task/cola/checkpoint-500
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_adapter.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
Configuration saved in ./adapter/task/cola/checkpoint-500/cola/head_config.json
Module weights saved in ./adapter/task/cola/checkpoint-500/cola/pytorch_model_head.bin
tokenizer config file saved in ./adapter/task/cola/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./adapter/task/cola/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./adapter/task/cola/checkpoint-1000
Configuration saved in ./adapter/task/cola/checkpoint-1000/cola/adapter_config.json
Module weights saved in ./adapter/task/cola/checkpoint-1000/cola/pytorch_adapter.bin
Configuration saved in ./adap

In [None]:
output