In [None]:
# !pip install -Uqq adapter-transformers datasets

%load_ext autoreload
%autoreload 1
%aimport adapter_utils
from adapter_utils import get_model, get_tokenizer, adapt_model, get_test_data

### Load and Adapt a Model
I've written helper functions to generalize / abstract the loading of the model and the tokenizer

In [None]:
tokenizer = get_tokenizer()

In [None]:
model = get_model()

In [None]:
adapt_model(model=model, adapter_name="qa/squad1@ukp", adapter_arch="houlsby")

### Test that it's working
Test that things are working by running the Q&A example from Adapter Hub's [sample notebook](https://github.com/Adapter-Hub/adapter-transformers/blob/master/notebooks/02_Adapter_Inference.ipynb).

In [None]:
from transformers import QuestionAnsweringPipeline

qa = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer)

context = """
The current modus operandi in NLP involves downloading and fine-tuning pre-trained models consisting of millions or billions of parameters.
Storing and sharing such large trained models is expensive, slow, and time-consuming, which impedes progress towards more general and versatile NLP methods that learn from and for many tasks.
Adapters -- small learnt bottleneck layers inserted within each layer of a pre-trained model -- ameliorate this issue by avoiding full fine-tuning of the entire model.
However, sharing and integrating adapter layers is not straightforward.
We propose AdapterHub, a framework that allows dynamic "stitching-in" of pre-trained adapters for different tasks and languages.
The framework, built on top of the popular HuggingFace Transformers library, enables extremely easy and quick adaptations of state-of-the-art pre-trained models (e.g., BERT, RoBERTa, XLM-R) across tasks and languages.
Downloading, sharing, and training adapters is as seamless as possible using minimal changes to the training scripts and a specialized infrastructure.
Our framework enables scalable and easy access to sharing of task-specific models, particularly in low-resource scenarios.
AdapterHub includes all recent adapter architectures and can be found at AdapterHub.ml.
"""

In [None]:
# ignore all FutureWarnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

In [None]:
def answer_questions(questions):
    for question in questions:
        result = qa(question=question, context=context)
        print("❔", question)
        print("💡", result["answer"])
        print()

answer_questions([
    "What are Adapters?",
    "What do Adapters avoid?",
    "What is proposed?",
    "What does AdapterHub allow?",
    "Where can I find AdapterHub?",
])

### List the datasets that exist at HuggingFace
HuggingFace makes it easy to access public NLP datasets

In [None]:
import datasets
hf_data = datasets.list_datasets()
for data in hf_data:
    if "glue" in data.lower():
        print(data)

### Test some data
Test things with the rotten tomatoes example data

In [None]:
dataset = get_test_data()

### Test training
try the training procedure here: https://github.com/Adapter-Hub/adapter-transformers/blob/master/notebooks/02_Adapter_Inference.ipynb

In [None]:
from transformers import RobertaConfig, RobertaModelWithHeads

config = RobertaConfig.from_pretrained(
    "roberta-base",
    num_labels=2,
)
model = RobertaModelWithHeads.from_pretrained(
    "roberta-base",
    config=config,
)

# Add a new adapter
model.add_adapter("rotten_tomatoes")
# Add a matching classification head
model.add_classification_head(
    "rotten_tomatoes",
    num_labels=2,
    id2label={ 0: "👎", 1: "👍"}
  )
# Activate the adapter
model.train_adapter("rotten_tomatoes")

In [None]:
import numpy as np
from transformers import TrainingArguments, Trainer, EvalPrediction

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_accuracy,
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=training_args.device.index)

classifier("This is awesome!")

In [None]:
model.save_adapter("./final_adapter", "rotten_tomatoes")

!ls -lh final_adapter

### The Arvix dataset is going to take more prep work
You have to manually download and extract this dataset, and even then I haven't been able to get it work yet. There's some nuance in the way you have to tell HuggingFace to split the data and I haven't gotten this part working yet.

In [None]:
from datasets import load_dataset, SplitInfo
arvix_data = load_dataset("arxiv_dataset", data_dir="./arvix_data/", split=SplitInfo(name='train', num_bytes=2246545603, num_examples=1796911, dataset_name='arxiv_dataset'))

### Training a GLUE Adapter

Using `run_glue_alt.py` from the Adapter Transformers repo, we can easily create an adapter for one of the GLUE tasks. Here's an example from their documentation:

```
export TASK_NAME=mrpc

python run_glue_alt.py \
  --model_name_or_path bert-base-uncased \
  --task_name $TASK_NAME \
  --do_train \
  --do_eval \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --learning_rate 1e-4 \
  --num_train_epochs 10.0 \
  --output_dir /tmp/$TASK_NAME \
  --overwrite_output_dir \
  --train_adapter \
  --adapter_config pfeiffer
```

For convenience, I've created a shell script that launches the training process to create an adapter for the CoLA task called `cola_adapter.sh` the contents of which are:

```
#!/bin/bash
export TASK_NAME=cola

python run_glue_alt.py \
  --model_name_or_path roberta-base \
  --task_name $TASK_NAME \
  --do_train \
  --do_eval \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --learning_rate 1e-5 \
  --num_train_epochs 10.0 \
  --output_dir ./adapter/$TASK_NAME \
  --overwrite_output_dir \
  --train_adapter \
  --adapter_config pfeiffer
```

You can run the script by running the next cell.

In [None]:
!sh ./cola_adapter.sh

### Train an adapter for The Stanford Sentiment Treebank dataset
1. Create `sst_adapter.sh` using `cola_adapter.sh` as an example, but train for the SST task.
1. Push the newly created `sst_adapter.sh` file into GitLab
1. Run the script to train the adapter. (in the cell below)
1. Download the trained adapter from ./adapter/sst and store it in our Google Drive

In [None]:
!sh ./sst_adapter.sh