# Attach a LoRA adapter and train

## Setup & Imports

In [2]:
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
from twitter_emoji_reaction_lora.data import load_emoji_dataset, tokenize_and_format
from twitter_emoji_reaction_lora.model import build_base_model, build_peft_model
from twitter_emoji_reaction_lora.train import get_weighted_trainer
from twitter_emoji_reaction_lora.utils import print_trainable_parameters, compute_metrics
from uuid import uuid4
import os
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
load_dotenv()

login(token=os.getenv("HUGGINGFACE_TOKEN"))

os.environ["WANDB_API_KEY"] = os.getenv("WANDB_API_KEY")
os.environ["WANDB_PROJECT"] = "Emoji-reaction-coach-with-lora"
os.environ["WANDB_NOTES"] = "Fine tune model with low rank adaptation for an emoji reaction coach"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use only one GPU

## Load & Tokenize  
We pull in the same `load_emoji_dataset` and `tokenize_and_format` functions we used in the 01_explore_tokenize notebook.


In [4]:
# load raw splits
ds = load_emoji_dataset()

# apply our shared tokenization routine
ds_tok, tok = tokenize_and_format(ds, max_length=128)


## 3. Model Initialization & LoRA  
Here we use `build_base_model` to get a `RobertaForSequenceClassification`, then wrap it in LoRA via `build_peft_model`.


In [5]:
model = build_base_model()
print_trainable_parameters(model)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 124661012 || all params: 124661012 || trainable%: 100.00


In [6]:
lora_model = build_peft_model(model)
print_trainable_parameters(lora_model)

trainable params: 2375444 || all params: 127036456 || trainable%: 1.87


## 4. Trainer Setup  
Then we prepare the model for training!


In [None]:
model_id = "roberta-base-with-tweet-eval-emoji"

training_args = TrainingArguments(
    output_dir=model_id,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-4,
    weight_decay=0.01,
    lr_scheduler_type="linear",
    warmup_ratio=0.05,
    num_train_epochs=4,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=256,
    max_grad_norm=0.5,
    label_smoothing_factor=0.1,
    save_total_limit=3,
    logging_steps=30,
    fp16=True,
    push_to_hub=False,
    report_to="wandb",
    run_name = f"copmuter-emoji-{uuid4().hex[:8]}",
    label_names=["labels"],
)
data_collator = DataCollatorWithPadding(tok, pad_to_multiple_of=8)

In [None]:
trainer = get_weighted_trainer(
    model=lora_model,
    args=training_args,
    ds_tok=ds_tok,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Top3 Accuracy
1,2.1401,2.417112,0.2224,0.220445,0.4152
2,2.1157,2.372503,0.2412,0.2463,0.4376
3,2.0143,2.351388,0.2508,0.255062,0.4448
4,1.9346,2.357327,0.2594,0.263203,0.45


0,1
eval/accuracy,▁▅▆█
eval/f1,▁▅▇█
eval/loss,█▃▁▂
eval/runtime,▃█▅▁
eval/samples_per_second,▆▁▄█
eval/steps_per_second,▆▁▄█
eval/top3_accuracy,▁▆▇█
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▁▂▅▄▆▆▄▃▆▄▄▅▄▅▆█▆▅▅▆▄▆▆▅▇▆▆▆▇▅▇▆▆▇██▆█▆█

0,1
eval/accuracy,0.2594
eval/f1,0.2632
eval/loss,2.35733
eval/runtime,10.4631
eval/samples_per_second,477.869
eval/steps_per_second,1.911
eval/top3_accuracy,0.45
total_flos,1.217029238784e+16
train/epoch,4.0
train/global_step,1408.0


In [139]:
metrics = trainer.evaluate(ds_tok["test"])
print(metrics)

# save weights and tokenizer
lora_model.save_pretrained(model_id)
tok.save_pretrained(model_id)

{'eval_loss': 1.9591624736785889, 'eval_accuracy': 0.4286, 'eval_f1': 0.33462876199193464, 'eval_top3_accuracy': 0.65028, 'eval_runtime': 104.9624, 'eval_samples_per_second': 476.361, 'eval_steps_per_second': 1.867, 'epoch': 4.0}


('roberta-base-with-tweet-eval-emoji-full/tokenizer_config.json',
 'roberta-base-with-tweet-eval-emoji-full/special_tokens_map.json',
 'roberta-base-with-tweet-eval-emoji-full/vocab.json',
 'roberta-base-with-tweet-eval-emoji-full/merges.txt',
 'roberta-base-with-tweet-eval-emoji-full/added_tokens.json',
 'roberta-base-with-tweet-eval-emoji-full/tokenizer.json')

In [None]:
wandb.finish()

In [140]:
!ls -lh {model_id}

total 14M
-rw-r--r-- 1 root root  870 Jun 23 18:44 adapter_config.json
-rw-r--r-- 1 root root 9.1M Jun 23 18:44 adapter_model.safetensors
drwxr-xr-x 2 root root 4.0K Jun 23 18:32 checkpoint-1056
drwxr-xr-x 2 root root 4.0K Jun 23 16:59 checkpoint-1408
drwxr-xr-x 2 root root 4.0K Jun 23 17:03 checkpoint-2112
-rw-r--r-- 1 root root 446K Jun 23 18:44 merges.txt
-rw-r--r-- 1 root root 5.0K Jun 23 18:44 README.md
-rw-r--r-- 1 root root  280 Jun 23 18:44 special_tokens_map.json
-rw-r--r-- 1 root root 1.3K Jun 23 18:44 tokenizer_config.json
-rw-r--r-- 1 root root 3.4M Jun 23 18:44 tokenizer.json
-rw-r--r-- 1 root root 780K Jun 23 18:44 vocab.json


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
