# CS-7643 - Deep Learning - Summer 2024 - Final Project - `main.ipynb`
# Group - Big Daaata 

This notebook has code for running experiments to compare training and validation performance and complexity of DistilBERT, BART, and Electra on selected subtasks from the SuperGLUE dataset with and without adapters.

# Installs

In [2]:
!pip3 install datasets
!pip3 install evaluate
!pip3 install transformers
!pip3 install adapters
!pip3 install accelerate==0.30

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable




# Imports

In [None]:
import datetime
import logging
import os
import pandas as pd
import plotnine as pn
import random
import sys
import time
from dataclasses import dataclass, field
from typing import Optional

import datasets
import numpy as np
from datasets import load_dataset, Dataset, DatasetDict

import evaluate

import transformers

from transformers import (
    AutoConfig,
    AutoTokenizer,
    DataCollatorWithPadding, 
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,      
    default_data_collator,
    set_seed,
)

from adapters import (
    AdapterArguments,
    AdapterTrainer,
    AutoAdapterModel,
    setup_adapter_training, 
    AdapterConfig,          

    # Pre-defined adapter configurations
    BnConfig,
    PrefixTuningConfig,
    MAMConfig, 
    CompacterPlusPlusConfig, 
    UniPELTConfig, 
    IA3Config,
    LoRAConfig, 
    PromptTuningConfig,
    ConfigUnion
)

from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version           
from transformers.utils.versions import require_version

  from .autonotebook import tqdm as notebook_tqdm


# Helper Functions and Classes

# Task-to-Key Mapping

The SuperGLUE benchmark consists of 8 tasks. 
Each task is mapped to a standard key in the underlying `adapters` and `transformers` library code. 
Each task also has its own input and output. These do not share a static `X` and `y` names. Instead the key is different for each tasks' I/Os.

We build a dictionary here called `task_to_keys` that maps each SuperGLUE task to its underlying name in `transformers` along with the names for its I/Os.

Also added mapping for GLUE tasks since we decided to explore this benchmark as well.

In [None]:
task_to_keys = {
    "boolq": ("question", "passage"),
    "cb": ("hypothesis", "premise"),            # Swapped based on Ali's recommendation
    "copa": ("premise", "choice1", "choice2"),  # TODO: doesn't work - debug
    "multirc": ("paragraph", "question"),       # TODO: doesn't work - debug
    "record": ("passage", "query"),             # TODO: doesn't work - debug
    "rte": ("premise", "hypothesis"),
    "wic": ("sentence1", "sentence2"),
    "wsc": ("text", None),

    # For glue dataset
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2")
}

## `filter_unused_arguments`

Basic function that removes invalid arguments passed through command line, shell, or similar interface. A nicer version of `ArgParse`.

In [None]:
def filter_unused_args(args):
    filtered_args = []
    for arg in args:
        if not arg.startswith("-f") and not (arg.endswith(".json") or arg.endswith(".py")):
            filtered_args.append(arg)
    return filtered_args

## `DataTrainingArguments`

A data validation class for the inputs that get passed to the `datasets `library for instnatiating a `Dataset` object for training with SuperGLUE.

In [None]:
@dataclass
class DataTrainingArguments:
    """
    Basic data-class that defines what arguments can be passed as training data.
    Performs basic input validation and post-initialization check.
    """
    task_name: Optional[str] = field(
        default='boolq',
        metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())},
    )

    dataset_name: Optional[str] = field(
        default='super_glue', metadata={"help": "The name of the dataset to use (via the datasets library)."}
    )

    dataset_config_name: Optional[str] = field(
        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
    )

    max_seq_length: int = field(
        default=128,
        metadata={
            "help": (
                "The maximum total input sequence length after tokenization. Sequences longer "
                "than this will be truncated, sequences shorter will be padded."
            )
        },
    )

    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
    )

    pad_to_max_length: bool = field(
        default=True,
        metadata={
            "help": (
                "Whether to pad all samples to `max_seq_length`. "
                "If False, will pad the samples dynamically when batching to the maximum length in the batch."
            )
        },
    )

    max_train_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of training examples to this "
                "value if set."
            )
        },
    )

    max_eval_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
                "value if set."
            )
        },
    )

    max_predict_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of prediction examples to this "
                "value if set."
            )
        },
    )

    train_file: Optional[str] = field(
        default=None,
        metadata={"help": "A csv or a json file containing the training data."}
    )

    validation_file: Optional[str] = field(
        default=None,
        metadata={"help": "A csv or a json file containing the validation data."}
    )

    test_file: Optional[str] = field(
        default=None,
        metadata={"help": "A csv or a json file containing the test data."}
    )

    def __post_init__(self):
        if self.task_name is not None:
            self.task_name = self.task_name.lower()
            if self.task_name not in task_to_keys.keys():
                raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys()))
        elif self.dataset_name is not None:
            pass
        elif self.train_file is None or self.validation_file is None:
            raise ValueError("Need either a GLUE/SuperGLUE task, a training/validation file or a dataset name.")
        else:
            train_extension = self.train_file.split(".")[-1]
            assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file."
            validation_extension = self.validation_file.split(".")[-1]
            assert (
                validation_extension == train_extension
            ), "`validation_file` should have the same extension (csv or json) as `train_file`."


## `ModelArguments`

A data validation class for the arguments or configurations used to instantiate a `transformers` `Model` object.

In [5]:
@dataclass
class ModelArguments:
    model_name_or_path: str = field(
        default='distilbert/distilbert-base-uncased',
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )

    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )
    
    use_fast_tokenizer: bool = field(
        default=True,
        metadata={"help": "Whether to use one of the fast tokenizers (backed by the tokenizers library) or not."},
    )
    
    model_revision: str = field(
        default="main",
        metadata={"help": "The specific model version to use (can be a branch name, tag name, or commit id)."},
    )
    
    use_auth_token: bool = field(
        default=False,
        metadata={
            "help": (
                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
                "with private models)."
            )
        },
    )
    
    ignore_mismatched_sizes: bool = field(
        default=False,
        metadata={"help": "Will enable loading a pretrained model whose head dimensions are different."},
    )
    

## `TrainingArguments`

Simple data validation class for arguments passed to instantiate a `Trainer` using the `transformers `library.

In [None]:
@dataclass
class TrainingArguments(transformers.TrainingArguments):
    output_dir: str = field(
        default="./results",
        metadata={"help": "The output directory where the model predictions and checkpoints will be written."}
    )

    num_train_epochs: float = field(
        default=10.0,
        metadata={"help": "Total number of training epochs to perform."},
    )
    
    adapter: str = field(
        default=True,
        metadata={"help": "Whether you wanna train adapter or fine-tune"}
    )

    evaluation_strategy: str = field(
        default="epoch",
        metadata={"help": "The evaluation strategy to use during training."},
    )

    logging_strategy: str = field(
        default="epoch",
        metadata={"help": "The logging strategy to use during training."},
    )

## Creating a Parser

We combine all three data validation classes (`Model`, `DataTraining`, `TrainingArguments`) to create a Hugging Face Argument Parser object which accepts, checks, and sanitizes arguments passed through command line.

In [None]:
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
filtered_args = filter_unused_args(sys.argv)
model_args, data_args, training_args = parser.parse_args_into_dataclasses(args=filtered_args)

## Logger

In [None]:
# Set up logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN)
set_seed(42)

## Generic Helpers

Expectation is that this set of generic functions for preprocessing data and computing evaluation metrics will generalize across all tasks.

In [None]:
def preprocess_data(raw_datasets,data_args,model,is_regression,num_labels,label_list,tokenizer):
    raw_datasets = raw_datasets.map(
        preprocess_function,
        batched=True,
        load_from_cache_file=not data_args.overwrite_cache,
        desc="Running tokenizer on dataset",
    )

    train_dataset = raw_datasets["train"]
    if data_args.max_train_samples is not None:
        max_train_samples = min(len(train_dataset), data_args.max_train_samples)
        train_dataset = train_dataset.select(range(max_train_samples))

    eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"]
    if data_args.max_eval_samples is not None:
        max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
        eval_dataset = eval_dataset.select(range(max_eval_samples))

    predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"]
    if data_args.max_predict_samples is not None:
        max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
        predict_dataset = predict_dataset.select(range(max_predict_samples))

    metric = evaluate.load(data_args.dataset_name, data_args.task_name)

    data_collator = default_data_collator
    
    return train_dataset, eval_dataset, predict_dataset, data_args, data_collator, model, metric

def preprocess_function(examples):
    args = (
        (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
    )
    result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)
    if label_to_id is not None and "label" in examples:
        result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]]
    return result

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)
    if data_args.task_name is not None:
        result = metric.compute(predictions=preds, references=p.label_ids)
        if len(result) > 1:
            result["combined_score"] = np.mean(list(result.values())).item()
        return result
    elif is_regression:
        return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
    else:
        return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

In [None]:
def run_model(model,training_args,train_dataset,eval_dataset,tokenizer,data_collator):

    trainer_class = AdapterTrainer if training_args.adapter else Trainer
    trainer = trainer_class(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    start_time = time.time()

    checkpoint = get_last_checkpoint(training_args.output_dir) if training_args.resume_from_checkpoint is None else training_args.resume_from_checkpoint
    train_result = trainer.train()

    end_time = time.time()
    training_time = end_time - start_time

    metrics = train_result.metrics
    max_train_samples = data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
    metrics["train_samples"] = min(max_train_samples, len(train_dataset))
    metrics["training_time"] = training_time

    trainer.save_model()  # Saves the tokenizer too for easy upload

    trainer.log_metrics("train", metrics)
    trainer.save_metrics("train", metrics)
    trainer.save_state()

    # Evaluate and log metrics after training
    eval_metrics = trainer.evaluate()
    trainer.log_metrics("eval", eval_metrics)
    trainer.save_metrics("eval", eval_metrics)

    # Initialize list to capture epoch metrics
    epoch_metrics_train = []
    epoch_metrics_val = []

    # Loop through trainer.state.log_history to get metrics at each epoch
    for log in trainer.state.log_history:
        if 'epoch' in log and 'loss' in log:
            epoch_metrics_train.append(log)
        if 'epoch' in log and 'eval_loss' in log:
            epoch_metrics_val.append(log)

    # Create DataFrame from epoch metrics
    metrics_df_train = pd.DataFrame(epoch_metrics_train)
    metrics_df_val = pd.DataFrame(epoch_metrics_val)
    metrics_df = pd.merge(metrics_df_train, metrics_df_val, on='epoch')
    metrics_df.rename(columns={'loss': 'train_loss', 'eval_loss': 'val_loss'}, inplace=True)

    return trainer, train_result, metrics_df

In [None]:
def run_iteration(model_string,data_args,training_args):
    global sentence1_key, sentence2_key,padding,label_to_id,tokenizer,max_seq_length,is_regression,metric
    training_args.adapter_type = model_string
    raw_datasets = load_dataset(
        data_args.dataset_name,
        data_args.task_name,
        cache_dir=model_args.cache_dir,
        use_auth_token=model_args.use_auth_token
    )

    is_regression = data_args.task_name == "stsb"
    
    if not is_regression:
        label_list = raw_datasets["train"].features["label"].names
        num_labels = len(label_list)
    else:
        num_labels = 1

    config = AutoConfig.from_pretrained(
        model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
        use_auth_token=model_args.use_auth_token,
    )

    tokenizer = AutoTokenizer.from_pretrained(
        model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
        use_auth_token=model_args.use_auth_token
    )
    if training_args.adapter:
        model = AutoAdapterModel.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
            use_auth_token=model_args.use_auth_token,
            ignore_mismatched_sizes=model_args.ignore_mismatched_sizes
        )
        model.add_classification_head(
        data_args.dataset_name,
        num_labels=num_labels,
        id2label={i: v for i, v in enumerate(label_list)} if not is_regression else None,
        )
        adapter_config_kwargs = {}
        adapter_load_kwargs = {}
        model.add_adapter(training_args.adapter_type, config=string_to_config[training_args.adapter_type])
        if training_args.adapter_type=='prefix_tuning':
            model.eject_prefix_tuning("prefix_tuning")
        model.train_adapter([training_args.adapter_type])
        model.set_active_adapters(training_args.adapter_type)
    else:
        model = transformers.AutoModelForSequenceClassification.from_pretrained(
            model_args.model_name_or_path,
            config=config,
            cache_dir=model_args.cache_dir,
            use_auth_token=model_args.use_auth_token,
            ignore_mismatched_sizes=model_args.ignore_mismatched_sizes
        )
    sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
    padding = "max_length" if data_args.pad_to_max_length else False
    label_to_id = None
    if (
        model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id
        and data_args.task_name is not None
        and not is_regression
    ):
        label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
        if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
            label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
        else:
            logger.warning(
                "Your model seems to have been trained with labels, but they don't match the dataset: ",
                f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
                "\nIgnoring the model labels as a result.",
            )
    elif data_args.task_name is None and not is_regression:
        label_to_id = {v: i for i, v in enumerate(label_list)}
    if label_to_id is not None:
        model.config.label2id = label_to_id
        model.config.id2label = {id: label for label, id in config.label2id.items()}
    elif data_args.task_name is not None and not is_regression:
        model.config.label2id = {l: i for i, l in enumerate(label_list)}
        model.config.id2label = {id: label for label, id in config.label2id.items()}

    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

    train_dataset, eval_dataset, predict_dataset, data_args, data_collator, model, metric = \
                            preprocess_data(raw_datasets,data_args,model,is_regression,num_labels,label_list,tokenizer)
    trainer, train_result = run_model(model,training_args,train_dataset,eval_dataset,tokenizer,data_collator)
    
    eval_metrics = trainer.evaluate(eval_dataset=eval_dataset)
    eval_accuracy = eval_metrics['eval_accuracy']
    
    return eval_accuracy

In [None]:
def get_experiment_results_summary(results_dict, user_name = 'saad', output_dir = "./"):
  """
  Helper function that accepts a dictionary of results returned by the `run_iteration`
  function and extracts all relevant information into a dataframe.

  Expects results_dict will be a dictionary of dictionaries with key = model name
  and values = the result set of a single run of the `run_iteration` function.

  Args
  - `results_dict` (dict): Dicitonary of dictionaries returned from `run_iteration`
  - `user_name` (str): Name of the user who generated the results
  """
  all_results_df = None

  for idx, curr_results_dict in enumerate(results_dict.values()):
    metadata_df = pd.DataFrame({
        'model_name':  curr_results_dict['model_string'],
        'dataset_name': curr_results_dict['dataset_name'],
        'task_name': curr_results_dict['task_name'],
        'used_adapter': curr_results_dict['used_adapter'],
        'adapter_type': curr_results_dict['adapter_type']
    }, index = [idx])

    train_metrics_df = pd.DataFrame(curr_results_dict['train_result'].metrics, index = [idx])
    eval_metrics_df = pd.DataFrame(curr_results_dict['eval_metrics'], index = [idx])
    curr_results_df = pd.concat([metadata_df, train_metrics_df, eval_metrics_df], axis = 1)

    if all_results_df is None:
      all_results_df = curr_results_df
    else:
      all_results_df = pd.concat([all_results_df, curr_results_df], axis = 0)

  all_results_df_cols_orig = all_results_df.columns.tolist()
  all_results_df['user'] = user_name
  all_results_df['generated_on'] = str(datetime.datetime.now())
  all_results_df_final = ['user', 'generated_on'] + all_results_df_cols_orig
  all_results_df = all_results_df[all_results_df_final]

  str_model_name = metadata_df['model_name'].unique()[0]
  str_model_name = [char for char in str_model_name if char not in ['/', '\\']]
  str_model_name = ''.join(str_model_name)
  str_dataset_name = metadata_df['dataset_name'].unique()[0]
  str_task_name = metadata_df['task_name'].unique()[0]
  str_adapter_name = metadata_df['used_adapter'].unique()[0]

  file_path_str = f'{output_dir}/summary_{str_dataset_name}_{str_task_name}_{str_model_name}_{str_adapter_name}_{str(datetime.datetime.now())}.csv'
  print(f"Attempting to save file to {file_path_str}")

  all_results_df.to_csv(file_path_str)

  return all_results_df

In [None]:
def get_experiment_results_epoch_summary(histories_dict, user_name='saad', cols_to_get = ['train_loss', 'val_loss'], output_dir="./"):
  all_epochs_results_df = None
  # meta_data_cols = ['model_string', 'dataset_name', 'task_name', 'used_adapter', 'adapter_type', 'epoch']
  # metric_cols = ['train_loss', 'val_loss']
  # all_cols = meta_data_cols + metric_cols
  for idx, curr_histories_df in enumerate(histories_dict.values()):
    curr_epochs_results_df = curr_histories_df

    if all_epochs_results_df is None:
      all_epochs_results_df = curr_epochs_results_df
    else:
      all_epochs_results_df = pd.concat([all_epochs_results_df, curr_epochs_results_df], axis = 0)

  all_epochs_results_df_cols_orig = all_epochs_results_df.columns.tolist()
  all_epochs_results_df['user_name'] = user_name
  all_epochs_results_df['generated_on'] = str(datetime.datetime.now())
  all_epochs_results_df_cols_final = ['user_name', 'generated_on'] + all_epochs_results_df_cols_orig
  all_epochs_results_df_final = all_epochs_results_df[all_epochs_results_df_cols_final]
  
  str_model_name = all_epochs_results_df['model_string'].unique()[0]
  str_model_name = [char for char in str_model_name if char not in ['/', '\\']]
  str_model_name = ''.join(str_model_name)
  str_dataset_name = all_epochs_results_df['dataset_name'].unique()[0]
  str_task_name = all_epochs_results_df['task_name'].unique()[0]
  str_adapter_name = all_epochs_results_df['used_adapter'].unique()[0]
  file_path_str = f'{output_dir}/epochs_{str_dataset_name}_{str_task_name}_{str_model_name}_{str_adapter_name}_{str(datetime.datetime.now())}.csv'
  all_epochs_results_df_final.to_csv(file_path_str) 

  return all_epochs_results_df_final

In [None]:
def plot_model_learning_curves(learning_curves_df, fig_size = (8, 8), output_dir = "./"):
  dataset_name = learning_curves_df['dataset_name'].unique()[0]
  task_name = learning_curves_df['task_name'].unique()[0]
  model_name = learning_curves_df['model_string'].unique()[0]

  metadata_cols = ['user_name', 'generated_on', 'model_string', 'dataset_name', 'task_name', 'used_adapter', 'adapter_type', 'epoch']
  metric_cols = ['train_loss', 'val_loss']
  learning_curves_df = learning_curves_df[metadata_cols + metric_cols]
   
  learning_curves_df_long = learning_curves_df.melt(
      id_vars = ['user_name', 'generated_on', 'model_string', 'dataset_name', 'task_name', 'used_adapter', 'adapter_type', 'epoch'],
      var_name = 'metric', value_name = 'value')
  learning_curves_df_plot = (
      pn.ggplot(learning_curves_df_long, pn.aes(x = 'epoch', y = 'value', color = 'metric')) +
      pn.geom_line() +
      pn.geom_point() +
      pn.facet_wrap('~  adapter_type', ncol = 3) +
      pn.labs(title = f'Learning Curves - {dataset_name} - {task_name} - {model_name}', x = 'Epoch', y = 'Loss') +
      pn.theme(figure_size = fig_size)
  )
  str_model_name = learning_curves_df['model_string'].unique()[0]
  str_model_name = [char for char in str_model_name if char not in ['/', '\\']]
  str_model_name = ''.join(str_model_name)
  str_dataset_name = learning_curves_df['dataset_name'].unique()[0]
  str_task_name = learning_curves_df['task_name'].unique()[0]
  str_adapter_name = learning_curves_df['used_adapter'].unique()[0]

  file_path_str = f"{output_dir}/learning_curve_{str_dataset_name}_{str_task_name}_{str_model_name}_{str_adapter_name}_{str(datetime.datetime.now())}.png"
  learning_curves_df_plot.save(file_path_str, width = fig_size[0], height = fig_size[1])
  print(learning_curves_df_plot)

## Adapter Configuration Mapping

The `adapters` library provides several versions of adapter objects that can be added to any transformer-based architecture. 

Here, we build a mapping between these adapters and names that we can use to access pre-instantiated adapters with reasonable defaults for configurations.

In [None]:
string_to_config = {
    'seq_bn': BnConfig(mh_adapter=True, output_adapter=True, reduction_factor=16, non_linearity="relu"),
    "prefix_tuning": PrefixTuningConfig(flat=False, prefix_length=30),
    "mam_adapter": MAMConfig(),
    "compacter_plusplus": CompacterPlusPlusConfig(),
    "unipelt":UniPELTConfig(),
    "lora": LoRAConfig(r=8, alpha=16),
    "seq_bn_16_2_relu": ConfigUnion(
        BnConfig(mh_adapter=True, output_adapter=False, reduction_factor=16, non_linearity="relu"),
        BnConfig(mh_adapter=False, output_adapter=True, reduction_factor=2, non_linearity="relu"),
    ),
    "seq_bn_16_2_tanh": ConfigUnion(
        BnConfig(mh_adapter=True, output_adapter=False, reduction_factor=16, non_linearity="tanh"),
        BnConfig(mh_adapter=False, output_adapter=True, reduction_factor=2, non_linearity="tanh"),
    ),
    "ia3":IA3Config(),
    'prompt_tuning': PromptTuningConfig(prompt_length=10)
}

## Experiment Configuration

For our project, an experiment consists of 
1. choosing a task from the SuperGLUE dataset
2. choosing one of three transformer models
3. configuring whether or not to use an adapter
4. iterating over all specified adapters in `string_to_config` mapping
5. recording results

In [None]:
### Set Hyperparams Here
# valid_model_names = ['distilbert/distilbert-base-uncased', 'google/electra-large-discriminator', 'facebook/bart-base'] 
model_args.model_name_or_path = 'facebook/bart-base'
data_args.dataset_name = 'super_glue'
data_args.task_name = 'cb'
training_args.adapter = False

In [None]:
accuracies = dict()
histories = dict()

In [None]:
# If GPU cannot allocate memory, reset session -> add any previously executed
# adapter names here
models_to_exclude = []

In [None]:
for model_string in string_to_config.keys():
    if training_args.adapter:
      if model_string in models_to_exclude:
        pass
      else:
        print(f"Running with model_string: {model_string}")
        accuracies[model_string], histories[model_string] = run_iteration(model_string,data_args,training_args)
    else:
      if model_string != 'seq_bn':
        pass
      else:
        print(f"Running with dummy model_string: {model_string} and adapter False")
        accuracies[model_string], histories[model_string] = run_iteration(model_string,data_args,training_args)

In [None]:
my_results_df = get_experiment_results_summary(accuracies, output_dir="/content/")
my_results_df

In [None]:
my_histories_df = get_experiment_results_epoch_summary(histories, output_dir="/content/")
my_histories_df

In [None]:
plot_model_learning_curves(my_histories_df, fig_size=(12, 12), output_dir="/content/")

In [None]:
from google.colab import files
files_output_dir = "/content/"
files_to_download = os.listdir(files_output_dir)
files_to_download = [fname for fname in files_to_download if fname.endswith(".csv") or fname.endswith(".png")]
for fname in files_to_download:
  files.download(fname)