In [None]:
# Imports
# Need: pip install accelerate -U
import os
from collections import namedtuple
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
from adapters import AutoAdapterModel

In [None]:
device_name = "cpu"  # default device is CPU
if torch.cuda.is_available():
    # I read that this works for detecting if notebook is being run in a colab environment, not sure though
    if 'COLAB_GPU' in os.environ:
        print("colab environment")
        device_name = "gpu" 
    else:
        device_name = "cuda:0" # CUDA for NVIDIA GPU
elif torch.backends.mps.is_available():
    device_name = torch.device("mps")  # Metal Performance Shaders for Apple M-series GPU

# device_name = "cuda:0"
device = torch.device(device_name)
print(device_name)

In [None]:
task_param = "cola"
TaskConfig = namedtuple("TaskConfig", ["sentence_type", "class_type", "num_classes", "col_names"])

task_configs = {
    "cola": TaskConfig("one", "BC", 1, ['sentence']),
    "sst2": TaskConfig("one", "BC", 1, ['sentence']),
    "mrpc": TaskConfig("two", "BC", 1, ['sentence1', 'sentence2']),
    "stsb": TaskConfig("two", "R", 1, ['sentence1', 'sentence2']),
    "qqp": TaskConfig("two", "BC", 1, ['question1', 'question2']),
    "mnli_matched": TaskConfig("two", "MC", 3, ['premise', 'hypothesis']),
    "mnli_mismatched": TaskConfig("two", "MC", 3, ['premise', 'hypothesis']),
    "qnli": TaskConfig("two", "BC", 1, ['question', 'sentence']),
    "rte": TaskConfig("two", "BC", 1, ['sentence1', 'sentence2']),
    "wnli": TaskConfig("two", "BC", 1, ['sentence1', 'sentence2']),
}

task_config = task_configs[task_param]

if task_param == "mnli_matched": 
    data = load_dataset("glue", "mnli") 
    val_key = "validation_matched"
    test_key = "test_matched"
elif task_param == "mnli_mismatched":
    data = load_dataset("glue", "mnli") 
    val_key = "validation_mismatched"
    test_key = "test_mismatched"
else:
    data = load_dataset("glue", task_param)
    val_key = "validation"
    test_key = "test"

data

In [None]:
# Process dataset
from transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained('distilroberta-base')

max_len = 80 #512 # TODO: how was this decided?

def tokenize(examples):
    return tokenizer(examples[task_config.col_names[0]],
                     add_special_tokens=True, 
                     padding='max_length',
                     truncation=True,
                     max_length=max_len,
                     return_tensors='pt')

def tokenize_double(examples):
    return tokenizer(examples[task_config.col_names[0]],
                     examples[task_config.col_names[1]],
                     add_special_tokens=True,
                     padding='max_length',
                     truncation=True,
                     max_length=max_len,
                     return_tensors='pt')

# Encode the input data
data = data.map(tokenize, batched=True)
# The transformers model expects the target class column to be named "labels"
data = data.rename_column(original_column_name="label", new_column_name="labels") 
# Transform to pytorch tensors and only output the required columns
data.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

In [None]:
# Model setup
from adapters import AutoAdapterModel
from transformers import RobertaConfig

config = RobertaConfig.from_pretrained(
    "distilroberta-base",
    num_labels=2,
)
model = AutoAdapterModel.from_pretrained(
    "distilroberta-base",
    config=config,
)
# model = AutoAdapterModel.from_pretrained('distilroberta-base')

In [None]:
# Adapter setup
from adapters import ConfigUnion, PrefixTuningConfig, ParBnConfig

adapter_config = PrefixTuningConfig(flat=False, prefix_length=30)
# # ConfigUnion(
# #     PrefixTuningConfig(prefix_length=20),
# #     ParBnConfig(reduction_factor=4),)

# Add a new adapter
model.add_adapter(task_param, config=adapter_config)

# Add a matching classification head
model.add_classification_head(
    task_param,
    num_labels=2
  )

# Activate the adapter
model.train_adapter(task_param)
model.set_active_adapters(task_param) # Possibly redundant

In [None]:
# Trainer setup
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer
from transformers import default_data_collator

# training_args = TrainingArguments(
#     learning_rate=3e-4,
#     max_steps=10000,
#     per_device_train_batch_size=32,
#     per_device_eval_batch_size=32,
#     logging_steps=1000,
#     output_dir="adapter-roberta-base-amazon-polarity",
#     overwrite_output_dir=True,
#     remove_unused_columns=False,
# )

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="adapter-roberta-base-amazon-polarity",
    overwrite_output_dir=True,
    remove_unused_columns=False, # Ensures dataset labels are properly passed to the model
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=data["train"],
    eval_dataset=data["validation"],
    compute_metrics=compute_accuracy,
    data_collator=default_data_collator,
)


In [None]:
# Train loop
trainer.train()