# Lightweight Fine-Tuning Project

TODO: In this cell, describe your choices for each of the following

* PEFT technique: 
* Model: 
* Evaluation approach: 
* Fine-tuning dataset: 

## Loading and Evaluating a Foundation Model

TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

Load the datasdet dair-air/emotion and explore the data

In [1]:
from datasets import load_dataset

ds = load_dataset("dair-ai/emotion", "split")
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [None]:
import random

# print some random featues and the labels
print("Features:")
indices = random.sample(range(len(ds["train"])), 10)
for i in indices:
    print("{} : {}".format(ds["train"]['text'][i], ds["train"]['label'][i]))

print("\nLabels: {}".format(ds["train"].features["label"].names))

Features:
i mentioned previously it has only been over two months i am feeling hopeful that if i am having more positive thought i might be able to forgive her : 1
i feel i might have been too gloomy about it : 0
ive been wrestling with feeling jealous envious of my gfs other bf since hes been staying with her for a while : 3
i was measuring a week big and that was enough to just make me feel lousy about myself : 0
i can however tell you that it will hurt you will be humiliated and you will feel wonderful afterwards : 1
i feel so blessed and beyond thankful for the opportunity to paint for my readers its been the best : 1
im feeling paranoid im well aware of the governments tactics and if they put it on the books they want to use it : 4
i have spent the majority of my life trying to change how i look in order to feel accepted by others to feel loved by other to feel better than people around me because in my mind my physicality is the only thing that i have to offer : 2
i dunno how els

In [None]:
# create data structures for further processing

# names of the splits
splits=list(ds.keys())
# number of classes
num_classes=len(ds["train"].features["label"].names)

# Dictionairies to translate between label string and label number
id2label = dict(zip(range(num_classes), ds['train'].features['label'].names))
label2id = dict(zip(ds['train'].features['label'].names, range(num_classes)))
print(id2label)
print(label2id)

{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}
{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}


In [16]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
import torch

# Use GPT-2 as a small base model
# Create a variant with classification head
device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
model_id = "openai-community/gpt2"
model = AutoModelForSequenceClassification.from_pretrained(
    model_id, 
    num_labels=num_classes,
    id2label=id2label,
    label2id=label2id,
    device_map=device)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Add tokens to the dataset
tokenized_ds = {}
for split in splits:
    tokenized_ds[split] = ds[split].map(
        lambda x: tokenizer(x["text"], truncation=True), batched=True
    )

for param in model.base_model.parameters():
    param.requires_grad = False

# Add the padding token which is missing in GPT-2
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))
    model.config.pad_token_id = model.config.eos_token_id
    print("Padding token: {}".format(tokenizer.pad_token))

# metric function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at openai-community/gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Padding token: [PAD]


In [17]:
import numpy as np
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis",
        learning_rate=2e-3,
        per_device_train_batch_size=200,
        per_device_eval_batch_size=200,
        num_train_epochs=5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.251168,0.5265
2,No log,1.151271,0.568
3,No log,1.143506,0.573
4,No log,1.129165,0.5815
5,No log,1.112125,0.5795


TrainOutput(global_step=400, training_loss=1.3240565490722656, metrics={'train_runtime': 74.0448, 'train_samples_per_second': 1080.427, 'train_steps_per_second': 5.402, 'total_flos': 2440865182924800.0, 'train_loss': 1.3240565490722656, 'epoch': 5.0})

In [18]:
# Evaluate the model
original_performance=trainer.evaluate()
print(original_performance)

{'eval_loss': 1.112125277519226, 'eval_accuracy': 0.5795, 'eval_runtime': 1.3327, 'eval_samples_per_second': 1500.697, 'eval_steps_per_second': 7.503, 'epoch': 5.0}


## Performing Parameter-Efficient Fine-Tuning

TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.

In [19]:
from peft import LoraConfig, TaskType, get_peft_model

torch.cuda.empty_cache()

# Use Lora for PEFT
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    task_type=TaskType.TOKEN_CLS,
    fan_in_fan_out=True,
)
model_lora = get_peft_model(model, peft_config)
model_lora.print_trainable_parameters()

trainable params: 594,432 || all params: 125,039,616 || trainable%: 0.4754


In [20]:
trainer_lora = Trainer(
    model=model_lora,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis_lora",
        learning_rate=2e-3,
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        num_train_epochs=5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer_lora.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.267075,0.9105
2,No log,0.199314,0.9275
3,No log,0.158047,0.9255
4,0.252300,0.122503,0.9305
5,0.252300,0.115791,0.9335




TrainOutput(global_step=800, training_loss=0.20162654399871827, metrics={'train_runtime': 144.1922, 'train_samples_per_second': 554.815, 'train_steps_per_second': 5.548, 'total_flos': 2325225977856000.0, 'train_loss': 0.20162654399871827, 'epoch': 5.0})

###  ⚠️ IMPORTANT ⚠️

Due to workspace storage constraints, you should not store the model weights in the same directory but rather use `/tmp` to avoid workspace crashes which are irrecoverable.
Ensure you save it in /tmp always.

In [24]:
# Saving the model
model_save_lora="/tmp/gpt2_lora"
model_lora.save_pretrained(model_save_lora, save_embedding_layers=True)

## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [25]:
from peft import PeftModelForTokenClassification

# loading the model
model_loaded = PeftModelForTokenClassification.from_pretrained(model, model_save_lora)

In [27]:
trainer_evaluate = Trainer(
    model=model_loaded,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis_lora_evaluate",
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        do_train=False,
        do_eval=True,
    ),
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

fine_tuned_performance=trainer_evaluate.evaluate()

In [29]:

print("Original Model:  ", original_performance)
print("Fine-Tuned Model:", fine_tuned_performance)

Original Model:   {'eval_loss': 1.112125277519226, 'eval_accuracy': 0.5795, 'eval_runtime': 1.3327, 'eval_samples_per_second': 1500.697, 'eval_steps_per_second': 7.503, 'epoch': 5.0}
Fine-Tuned Model: {'eval_loss': 0.1157907024025917, 'eval_model_preparation_time': 0.002, 'eval_accuracy': 0.9335, 'eval_runtime': 1.4146, 'eval_samples_per_second': 1413.79, 'eval_steps_per_second': 14.138}


In [None]:
import torch
import numpy as np
from accelerate import Accelerator
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments

from datasets import load_dataset

ds = load_dataset("dair-ai/emotion", "split")
splits=list(ds.keys())
num_classes=len(ds["train"].features["label"].names)
id2label = dict(zip(range(num_classes), ds['train'].features['label'].names))
label2id = dict(zip(ds['train'].features['label'].names, range(num_classes)))

# deviice
#device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
#print("Device = {}".format(device))

# base model
model_id = "openai-community/gpt2"
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model4b = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    num_labels=num_classes,
    id2label=id2label,
    label2id=label2id,
    torch_dtype="auto")


tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenized_ds = {}
for split in splits:
    tokenized_ds[split] = ds[split].map(
        lambda x: tokenizer(x["text"], truncation=True), batched=True
    )

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model4b.resize_token_embeddings(len(tokenizer))
    model4b.config.pad_token_id = model4b.config.eos_token_id
    print(tokenizer.pad_token)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

for param in model4b.base_model.parameters():
    param.requires_grad = False

# peft model
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    task_type=TaskType.TOKEN_CLS,
    fan_in_fan_out=True,
)

model4bl = get_peft_model(model4b, peft_config)
model4bl.print_trainable_parameters()

trainer = Trainer(
    model=model4bl,
    args=TrainingArguments(
        output_dir="./data/gpt2_lora_q4",
        learning_rate=2e-3,
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        num_train_epochs=3,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        fp16=True
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()
validation_lora_q4 = trainer.evaluate()

In [None]:
import torch
import numpy as np
from accelerate import Accelerator
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments

from datasets import load_dataset

ds = load_dataset("dair-ai/emotion", "split")
splits=list(ds.keys())
num_classes=len(ds["train"].features["label"].names)
id2label = dict(zip(range(num_classes), ds['train'].features['label'].names))
label2id = dict(zip(ds['train'].features['label'].names, range(num_classes)))

# deviice
#device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
#print("Device = {}".format(device))

# base model
model_id = "openai-community/gpt2"
#quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model4b = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    #quantization_config=quantization_config,
    num_labels=num_classes,
    id2label=id2label,
    label2id=label2id,
    torch_dtype="auto")


tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenized_ds = {}
for split in splits:
    tokenized_ds[split] = ds[split].map(
        lambda x: tokenizer(x["text"], truncation=True), batched=True
    )

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model4b.resize_token_embeddings(len(tokenizer))
    model4b.config.pad_token_id = model4b.config.eos_token_id
    print(tokenizer.pad_token)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

for param in model4b.base_model.parameters():
    param.requires_grad = False

# peft model
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    task_type=TaskType.TOKEN_CLS,
    fan_in_fan_out=True,
)

model4bl = get_peft_model(model4b, peft_config)
model4bl.print_trainable_parameters()

trainer = Trainer(
    model=model4bl,
    args=TrainingArguments(
        output_dir="./data/gpt2_lora_q4",
        learning_rate=2e-3,
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        num_train_epochs=3,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()
validation_lora_q8 = trainer.evaluate()

In [None]:
print("Validation Lora 4byte = {}".format(validation_lora_q4))
print("Validation Lora 8byte = {}".format(validation_lora_q8))