This notebook is adjusted from https://adapterhub.ml (fusion tutorial). 

The adapters used for fusion are: https://adapterhub.ml/adapters/ukp/bert-base-uncased_sentiment_sst-2_pfeiffer/ and my finetuned sst-adapter. 

In [10]:
!pip install -U adapter-transformers
!pip install datasets



In [11]:
import torch
torch.cuda.is_available()

True

In [12]:
from google.colab import drive
drive.mount("/content/gdrive/", force_remount=True)

Mounted at /content/gdrive/


In [13]:
import sys
sys.path.append('/content/gdrive/MyDrive/master_hpi/NLP_Project/code/')

In [14]:
path = "/content/gdrive/MyDrive/master_hpi/NLP_Project/code/"


In [15]:
path


'/content/gdrive/MyDrive/master_hpi/NLP_Project/code/'

In [16]:
from datasets import load_dataset

dataset = load_dataset("sst")
dataset.num_rows

No config specified, defaulting to: sst/default
Reusing dataset sst (/root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff)


{'test': 2210, 'train': 8544, 'validation': 1101}

In [17]:
dataset['train'].features


{'label': Value(dtype='float32', id=None),
 'sentence': Value(dtype='string', id=None),
 'tokens': Value(dtype='string', id=None),
 'tree': Value(dtype='string', id=None)}

In [18]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["sentence"], max_length=80, truncation=True, padding="max_length")

# Encode the input data
dataset = dataset.map(encode_batch, batched=True)

# Isa work:
def label_mapping(batch):
  batch["labels"] = round(batch["label"])
  return batch
dataset = dataset.map(label_mapping)
# # The transformers model expects the target class column to be named "labels"
# dataset.rename_column_("isa", "labels")

# end of Isa work

# Transform to pytorch tensors and only output the required columns
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

dataset["train"]["labels"]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-4589f93a18dce3fd.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-229cdc0eafef1853.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-9f6af83bc8a11a6b.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-1102ab2b49aad756.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-829c754b2609346c.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8

tensor([1, 1, 1,  ..., 1, 0, 0])

In [19]:
from transformers import BertConfig, BertModelWithHeads


config = BertConfig.from_pretrained(
    "bert-base-uncased",
    num_labels=2,


)
model = BertModelWithHeads.from_pretrained(
    "bert-base-uncased",
    config=config,
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [20]:
from transformers.adapters.composition import Fuse

# Load the pre-trained adapters we want to fuse
model.load_adapter("sentiment/sst-2@ukp", with_head=False)
model.load_adapter(path +"models/sst/", with_head=False)
print(model.config.adapters.adapters)


model.add_adapter_fusion(Fuse("sst-2", "sst-2"))
model.set_active_adapters(Fuse("sst-2", "sst-2"))

# Add a classification head for our target task
model.add_classification_head("sentiment/sst-2@ukp", num_labels=2, id2label={ 0: "👎", 1: "👍"})

Overwriting existing adapter 'sst-2'.


{'sst-2': '9ed5b5a29de19b71'}


In [21]:
# Unfreeze and activate fusion setup
adapter_setup = Fuse("sst-2", "sst-2")
model.train_adapter_fusion(adapter_setup)

In [22]:
import numpy as np
from transformers import TrainingArguments, Trainer, EvalPrediction

training_args = TrainingArguments(
    learning_rate=5e-5,
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_accuracy,
)

Start the training 🚀 (this will take a while)

In [23]:
trainer.train()

***** Running training *****
  Num examples = 8544
  Num Epochs = 5
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1335


Step,Training Loss
200,0.2322
400,0.1934
600,0.2221
800,0.1992
1000,0.1901
1200,0.1902


Saving model checkpoint to ./training_output/checkpoint-500
Configuration saved in ./training_output/checkpoint-500/sst-2/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/sst-2/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/sst-2,sst-2/adapter_fusion_config.json
Module weights saved in ./training_output/checkpoint-500/sst-2,sst-2/pytorch_model_adapter_fusion.bin
Saving model checkpoint to ./training_output/checkpoint-1000
Configuration saved in ./training_output/checkpoint-1000/sst-2/adapter_config.json
Module weights saved in ./training_output/checkpoint-1000/sst-2/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-1000/sst-2,sst-2/adapter_fusion_config.json
Module weights saved in ./training_output/checkpoint-1000/sst-2,sst-2/pytorch_model_adapter_fusion.bin


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=1335, training_loss=0.20217717059989102, metrics={'train_runtime': 703.1395, 'train_samples_per_second': 60.756, 'train_steps_per_second': 1.899, 'total_flos': 2711270814566400.0, 'train_loss': 0.20217717059989102, 'epoch': 5.0})

In [24]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 1101
  Batch size = 32


{'epoch': 5.0,
 'eval_acc': 0.8646684831970936,
 'eval_loss': 0.3806009888648987,
 'eval_runtime': 8.4092,
 'eval_samples_per_second': 130.928,
 'eval_steps_per_second': 4.162}

In [25]:
print(model.active_adapters)

model.save_adapter_fusion(path +"models/fusion3/", "sst-2,sst-2")
model.save_all_adapters(path +"models/fusion3/")

print(model.active_adapters)


!ls -l saved

Configuration saved in /content/gdrive/MyDrive/master_hpi/NLP_Project/code/models/fusion3/adapter_fusion_config.json


Fuse[sst-2, sst-2]


Module weights saved in /content/gdrive/MyDrive/master_hpi/NLP_Project/code/models/fusion3/pytorch_model_adapter_fusion.bin
Configuration saved in /content/gdrive/MyDrive/master_hpi/NLP_Project/code/models/fusion3/sst-2/adapter_config.json
Module weights saved in /content/gdrive/MyDrive/master_hpi/NLP_Project/code/models/fusion3/sst-2/pytorch_adapter.bin


Fuse[sst-2, sst-2]
ls: cannot access 'saved': No such file or directory
