# Using adapters for Slovak boolq

In [1]:
%pip install -q --user transformers==4.35.2
%pip install -q --user datasets
%pip install -q --user adapters

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [16]:
from datasets import load_dataset
from transformers import AutoTokenizer, XmodConfig

from adapters import XmodAdapterModel, AdapterConfig, Stack
from adapters.composition import Fuse

import torch

In [2]:
dataset = load_dataset("crabz/boolq_sk")
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'passage', 'answer'],
        num_rows: 4735
    })
    validation: Dataset({
        features: ['question', 'passage', 'answer'],
        num_rows: 1700
    })
})

In [3]:
dataset["train"].features

{'question': Value(dtype='string', id=None),
 'passage': Value(dtype='string', id=None),
 'answer': Value(dtype='bool', id=None)}

In [4]:
device = "cuda"
adapter_model_name_or_path = "AdapterHub/xmod-base"
model_name_or_path = "facebook/xmod-base"
tokenizer_name_or_path = "facebook/xmod-base"

label_column = "answer"
max_length = 128
lr = 1e-3
num_epochs = 3
batch_size = 32
id2label = {0: "no", 1: "yes"}
label2id = {"no": 0, "yes": 1}

In [6]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path)

def preprocess_function(examples):
    inputs = []

    keys = list(examples.keys())[:-1]
    values = list(zip(*[examples[key] for key in keys]))


    for value_set in values:
        inputs.append(", ".join(f"{key}: {value}" for key, value in zip(keys, value_set)))

    
    targets = examples[label_column]
    model_inputs = tokenizer(inputs, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt")
    labels = [1 if tl else 0 for tl in targets]
    model_inputs["labels"] = labels
    return model_inputs


# Encode the input data
dataset = dataset.map(preprocess_function, batched=True, remove_columns=["question", "passage", "answer"])
print(dataset)

Map:   0%|          | 0/4735 [00:00<?, ? examples/s]

Map:   0%|          | 0/1700 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 4735
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1700
    })
})


In [7]:
dataset["train"].features

{'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'labels': Value(dtype='int64', id=None)}

In [21]:
config = XmodConfig.from_pretrained(
    model_name_or_path,
    id2label=id2label,
)
model = XmodAdapterModel.from_pretrained(
    adapter_model_name_or_path,
    config=config,
)

# Load the pre-trained adapters we want to fuse
lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
model.load_adapter("AdapterHub/xmod-base-sk_SK", load_as="sk", source="hf", config=lang_adapter_config)
model.add_adapter("boolq", set_active=True, config=AdapterConfig.load("pfeiffer"))
model.train_adapter(["boolq"])
model.active_adapters = Stack("sk", "boolq")
model.add_classification_head("boolq", num_labels=len(label2id), id2label=id2label, use_pooler=True)

print("Active adapters:", model.active_adapters)


Some weights of XmodAdapterModel were not initialized from the model checkpoint at AdapterHub/xmod-base and are newly initialized: ['heads.default.2.weight', 'heads.default.0.bias', 'heads.default.2.bias', 'heads.default.3.bias', 'heads.default.0.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Active adapters: Stack[sk, boolq]


In [22]:
loss_function = torch.nn.BCEWithLogitsLoss()
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [{"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], \
                                 "weight_decay": 1e-4,}, \
                                {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], \
                                 "weight_decay": 0.0,},]
optimizer = torch.optim.AdamW(params=optimizer_grouped_parameters, lr=1e-3)

In [23]:
import numpy as np
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer

training_args = TrainingArguments(
    learning_rate=5e-5,
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_accuracy,
)

In [24]:
trainer.train()

Step,Training Loss
200,0.6677
400,0.6683
600,0.664


TrainOutput(global_step=740, training_loss=0.6659518061457453, metrics={'train_runtime': 94.8229, 'train_samples_per_second': 249.676, 'train_steps_per_second': 7.804, 'total_flos': 1728547893580800.0, 'train_loss': 0.6659518061457453, 'epoch': 5.0})

In [26]:
trainer.evaluate()

{'eval_loss': 0.6649482250213623,
 'eval_acc': 0.6123529411764705,
 'eval_runtime': 3.375,
 'eval_samples_per_second': 503.701,
 'eval_steps_per_second': 16.0,
 'epoch': 5.0}