## Training Task Adapters
Using randomized search, we identify optimal hyperparameters to train task specfic adapters on GLUE tasks.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd drive/MyDrive/cs7643-deep-learning-summer-2021/

In [None]:
# !pip install -Uqq adapter-transformers datasets

%load_ext autoreload
%autoreload 2
from task_utils import TaskModelArguments, TaskDataTrainingArguments, task_to_keys
from task import train_task_adapter
from transformers import (
    MultiLingAdapterArguments,
    TrainingArguments,
)

### CoLA Task

In [None]:
def initParse(dictionary):
  model = TaskModelArguments(
      model_name_or_path=dictionary.get('model_name_or_path')
  )

  data = TaskDataTrainingArguments(
      task_name=dictionary.get('task_name'),
      max_seq_length=dictionary.get('max_seq_length'),
      pad_to_max_length=dictionary.get('pad_to_max_length')
  )

  training = TrainingArguments(
    adam_beta1=dictionary.get('adam_beta1'),
    adam_beta2=dictionary.get('adam_beta2'),
    adam_epsilon=dictionary.get('adam_epsilon'),
    learning_rate=dictionary.get('learning_rate'),
    warmup_ratio=dictionary.get('warmup_ratio'),
    warmup_steps=dictionary.get('warmup_steps'),
    weight_decay=dictionary.get('weight_decay'),
    do_train=dictionary.get('do_train'),
    do_eval=dictionary.get('do_train'),
    per_device_train_batch_size=dictionary.get('per_device_train_batch_size'),
    num_train_epochs=dictionary.get('num_train_epochs'), # CHANGE ME
    overwrite_output_dir=dictionary.get('overwrite_output_dir'),
    output_dir=f"./adapter/task/{dictionary.get('task_name')}",
  )

  adapter = MultiLingAdapterArguments(
      train_adapter=True,
      adapter_config="pfeiffer",
  )

  return model, data, training, adapter

In [None]:
import random
import itertools
import subprocess
import pandas as pd

def getParams(dictionary, limit):
  paramsList = [dict(zip(dictionary, v)) for v in itertools.product(*dictionary.values())]
  random.shuffle(paramsList)

  if limit is not False:
    paramsList = paramsList[0:min(limit, len(paramsList))]

  return paramsList

**Define Dictionary of Hyperparameters**

In [None]:
glue_tasks = [
    "cola",
    "mnli",
    #"mrpc",
    "qnli",
    "qqp",
    "rte",
    "sst2",
    "stsb",
    "wnli",
]

In [None]:
task = 'cola'
paramDictionary = {'task_name':[task],
                   'model_name_or_path':['roberta-base'],
                   'max_seq_length':[64, 128, 256],
                   'pad_to_max_length':[True],
                   'per_device_train_batch_size':[8, 16, 32, 64, 128],
                   'adam_beta1':[.9],
                   'adam_beta2':[.999],
                   'adam_epsilon':[1e-8,1e-7,1e-6],
                   'learning_rate':[1e-6,1e-5,1e-4,1e-3],
                   'warmup_ratio':[0.0],
                   'warmup_steps':[0],
                   'weight_decay':[0.0],
                   'do_train':[True],
                   'do_eval':[True],
                   'num_train_epochs':[10],
                   'overwrite_output_dir':[True],
                   'adapter_config':['pfeiffer']
                   }

**Begin Looping**

In [None]:
limit = 15 #Numerical or False for no limit

for data_set in glue_tasks:
    output=[]
    paramDictionary["task_name"] = [data_set]
    paramsList = getParams(paramDictionary, limit)
    for p in paramsList:
        model, data, training, adapter = initParse(p)
        row = []
        row.extend(list(p.values()))
        train_stats, eval_stats = train_task_adapter(
        model_args=model, adapter_args=adapter, training_args=training, data_args=data
        )
        row.extend(list(train_stats.values()))
        row.extend(list(eval_stats.values()))

        output.append(row)

    header = []
    header.extend(list(p.keys()))
    header.extend(list(train_stats.keys()))
    header.extend(list(eval_stats.keys()))

    output_df = pd.DataFrame(output, columns=header)
    output_df.to_csv(f"./adapter/task/{data_set}_hp_search.csv")

In [None]:
output