# Fine-tuning with Low-Rank Adaptation (LoRA)

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install wandb python-dotenv datasets peft

In [20]:
import wandb
from dotenv import load_dotenv

from peft import LoraConfig, TaskType, get_peft_model
import transformers
from transformers import BertForSequenceClassification, TrainingArguments, Trainer, AutoTokenizer
from datasets import load_dataset
import numpy as np
from sklearn.metrics import f1_score

transformers.logging.set_verbosity_error()

In [21]:
load_dotenv()
wandb.login()



True

In [22]:
import os
os.environ["WANDB_PROJECT"]="semeval-21-task-7"
os.environ["WANDB_LOG_MODEL"]="checkpoint"

## Dataset loading

In [23]:
dataset = (
    load_dataset('csv', data_files={'train': 'drive/MyDrive/data/train.csv', 'dev': 'drive/MyDrive/data/dev.csv', 'test': 'drive/MyDrive/data/test.csv'})
    .remove_columns(['id', 'humor_rating', 'humor_controversy', 'offense_rating', 'sentence_length'])
    .rename_column('is_humor', 'label'))

## Model setup

In [24]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')


def tokenize(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=150)

In [25]:
dataset = dataset.map(tokenize, batched=True).remove_columns(['text'])

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

In [26]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

## LoRA Setup

In [27]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    f1 = f1_score(labels, predictions, average='weighted')

    return {
        'f1': f1
    }


## Training setup

In [None]:
for i in range(1, 6):
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=2 ** i,
        lora_alpha=4 ** i,
        lora_dropout=0.1,
        bias="none"
    )

    peft_model = get_peft_model(model, lora_config)
    trainable_params, all_param = peft_model.get_nb_trainable_parameters()

    print("------------------------------")
    print(f"r: {2 ** i} || trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param} ")

    trainer = Trainer(
        model=peft_model,
        args=TrainingArguments(
            output_dir='bert-lora-humor-detection',
            evaluation_strategy="steps",
            eval_steps=200,
            logging_steps=200,
            save_steps=200,
            num_train_epochs=2,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=32,
            report_to=["wandb"],
            run_name=f"bert-lora-humor-{i}",
            adam_epsilon=1e-08,
            learning_rate=2e-5
        ),
        train_dataset=dataset['train'],
        eval_dataset=dataset['dev'],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    trainer.train()
    wandb.finish()

------------------------------
r: 2 || trainable params: 75266 || all params: 109559044 || trainable%: 0.06869902953881196 
{'loss': 0.6546, 'learning_rate': 1e-05, 'epoch': 1.0}


[34m[1mwandb[0m: Adding directory to artifact (./bert-lora-humor-detection/checkpoint-200)... Done. 0.0s


{'eval_loss': 0.6381162405014038, 'eval_f1': 0.4861974868995849, 'eval_runtime': 7.0957, 'eval_samples_per_second': 112.744, 'eval_steps_per_second': 3.523, 'epoch': 1.0}


In [None]:
# trainer.predict(dataset['test']).metrics

{'test_loss': 0.3577665686607361,
 'test_f1': 0.8537736197348548,
 'test_runtime': 37.9098,
 'test_samples_per_second': 21.103,
 'test_steps_per_second': 2.638}