# Lightweight Fine-Tuning Project

TODO: In this cell, describe your choices for each of the following

* PEFT technique: 
* Model: 
* Evaluation approach: 
* Fine-tuning dataset: 

## Loading and Evaluating a Foundation Model

TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

### Load the datasdet dair-air/emotion and explore the data

In [1]:
from datasets import load_dataset

ds = load_dataset("dair-ai/emotion", "split")
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [2]:
import random

# print some random featues and the labels
print("Features:")
indices = random.sample(range(len(ds["train"])), 10)
for i in indices:
    print("{} : {}".format(ds["train"]['text'][i], ds["train"]['label'][i]))

print("\nLabels: {}".format(ds["train"].features["label"].names))

Features:
i feel more and more dissatisfied with each passing weekend : 3
i feel slightly naughty holding this cd seeing as it doesnt officially release until tuesday : 2
i was feeling bouncy so i added a few of my go to tangles around it i rather like the spiraling effect achieved : 1
i feel like he should have waited for a girl who was less messy : 0
i can look back likely years from now realize the impact of several lessons learned through the course of a season that just had that feel of something special and know that even if nothing in my tenure comes close to this again i will always have : 1
i still feel a little bit listless but im coping with it by getting as much work done as possible to distract myself and trying not to overthink anything : 0
i spent a lot of time earlier this year feeling stressed out about capacity and resistant to stretching it because it felt like stretching me : 0
i hurt their feelings for refusing to listen to their spiteful hurtful sniping at others 

In [3]:
# create data structures for further processing

# names of the splits
splits=list(ds.keys())
# number of classes
num_classes=len(ds["train"].features["label"].names)

# Dictionairies to translate between label string and label number
id2label = dict(zip(range(num_classes), ds['train'].features['label'].names))
label2id = dict(zip(ds['train'].features['label'].names, range(num_classes)))
print(id2label)
print(label2id)

{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}
{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}


Create a base model with added padding token

In [None]:
from transformers import AutoModelForSequenceClassification
import torch

device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"

# Create a base model variant with classification head
def create_base_model(model_id):
    model = AutoModelForSequenceClassification.from_pretrained(
        model_id, 
        num_labels=num_classes,
        id2label=id2label,
        label2id=label2id,
        device_map=device)
    if model.config.pad_token_id is None:
        model.config.pad_token_id = model.config.eos_token_id

    return model

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification

# Use GPT-2 as a small base model
model_id = "openai-community/gpt2"
model = create_base_model(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Add tokens to the dataset
tokenized_ds = {}
for split in splits:
    tokenized_ds[split] = ds[split].map(
        lambda x: tokenizer(x["text"], truncation=True), batched=True
    )

for param in model.base_model.parameters():
    param.requires_grad = False

# Add the padding token which is missing in GPT-2
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# metric function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at openai-community/gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
import numpy as np
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments
import os

temp_path = "/tmp"
save_path = "./data"

model_name = "gpt2_classification"
checkpoint_dir = os.path.join(temp_path, model_name)
save_dir_base = os.path.join(save_path, model_name)

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir=checkpoint_dir,
        learning_rate=2e-3,
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        num_train_epochs=4,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.298004,0.4965
2,No log,1.24204,0.538
3,No log,1.225004,0.5415
4,1.314000,1.219356,0.543


TrainOutput(global_step=640, training_loss=1.2838828802108764, metrics={'train_runtime': 58.6693, 'train_samples_per_second': 1090.86, 'train_steps_per_second': 10.909, 'total_flos': 1848843351244800.0, 'train_loss': 1.2838828802108764, 'epoch': 4.0})

In [7]:
# Evaluate the model
original_performance=trainer.evaluate()
print(original_performance)

model.save_pretrained(save_dir_base, save_embedding_layers=True)

{'eval_loss': 1.2193557024002075, 'eval_accuracy': 0.543, 'eval_runtime': 1.3184, 'eval_samples_per_second': 1517.005, 'eval_steps_per_second': 15.17, 'epoch': 4.0}


## Performing Parameter-Efficient Fine-Tuning

TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.

In [8]:
from peft import LoraConfig, TaskType, get_peft_model

torch.cuda.empty_cache()

# Use Lora for PEFT
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    task_type=TaskType.TOKEN_CLS,
    fan_in_fan_out=True,
)

# adding PEFT modifies the base model in-place
# so it should be saved for restoring the PEFT model later
model_lora = get_peft_model(model, peft_config)
model_lora.print_trainable_parameters()

model_name = "gpt2_classification_lora"
checkpoint_dir = os.path.join(temp_path, model_name)
save_dir = os.path.join(save_path, model_name)

trainable params: 594,432 || all params: 125,038,848 || trainable%: 0.4754


In [9]:
trainer_lora = Trainer(
    model=model_lora,
    args=TrainingArguments(
        output_dir=checkpoint_dir,
        learning_rate=2e-3,
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        num_train_epochs=4,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer_lora.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.195741,0.9215
2,No log,0.183972,0.9175
3,No log,0.126496,0.9285
4,0.255800,0.122841,0.9285


TrainOutput(global_step=640, training_loss=0.22506903409957885, metrics={'train_runtime': 113.8406, 'train_samples_per_second': 562.19, 'train_steps_per_second': 5.622, 'total_flos': 1861763687424000.0, 'train_loss': 0.22506903409957885, 'epoch': 4.0})

###  ⚠️ IMPORTANT ⚠️

Due to workspace storage constraints, you should not store the model weights in the same directory but rather use `/tmp` to avoid workspace crashes which are irrecoverable.
Ensure you save it in /tmp always.

In [10]:
# Saving the model

model_lora.save_pretrained(save_dir, save_embedding_layers=True)

## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [14]:
from peft import PeftModelForTokenClassification

# loading the model
#model_base = AutoModelForSequenceClassification.from_pretrained(save_dir_base)
# or build base model again
model_id = "openai-community/gpt2"
model_base = create_base_model(model_id)

model_loaded = PeftModelForTokenClassification.from_pretrained(model_base, save_dir)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at openai-community/gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
trainer_evaluate = Trainer(
    model=model_loaded,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis_lora_evaluate",
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        do_train=False,
        do_eval=True,
    ),
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

fine_tuned_performance=trainer_evaluate.evaluate()

In [16]:

print("Original Model:  ", original_performance)
print("Fine-Tuned Model:", fine_tuned_performance)

print("Original Model accurcy:   ", original_performance['eval_accuracy'])
print("Fine-Tuned Model accurcy: ", fine_tuned_performance['eval_accuracy'])

Original Model:   {'eval_loss': 1.2193557024002075, 'eval_accuracy': 0.543, 'eval_runtime': 1.3184, 'eval_samples_per_second': 1517.005, 'eval_steps_per_second': 15.17, 'epoch': 4.0}
Fine-Tuned Model: {'eval_loss': 0.12284120172262192, 'eval_model_preparation_time': 0.002, 'eval_accuracy': 0.9285, 'eval_runtime': 1.3973, 'eval_samples_per_second': 1431.381, 'eval_steps_per_second': 14.314}
Original Model accurcy:    0.543
Fine-Tuned Model accurcy:  0.9285


### Use different Quantization: QLoRA

In [None]:
from transformers import BitsAndBytesConfig

torch.cuda.empty_cache()

model_id = "openai-community/gpt2"
temp_path = "/tmp"
save_path = "./data"

model_name = "gpt2_classification_4bit_lora"
checkpoint_dir = os.path.join(temp_path, model_name)
save_dir_base = os.path.join(save_path, model_name)

model_id = "openai-community/gpt2"
quantization_config = BitsAndBytesConfig(load_in_4bit=True)

model4b = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    num_labels=num_classes,
    id2label=id2label,
    label2id=label2id,
    torch_dtype="auto")

model4b.config.pad_token_id = model.config.eos_token_id

for param in model4b.base_model.parameters():
    param.requires_grad = False

# peft model
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    task_type=TaskType.TOKEN_CLS,
    fan_in_fan_out=True,
)

model4bl = get_peft_model(model4b, peft_config)
model4bl.print_trainable_parameters()

trainer = Trainer(
    model=model4bl,
    args=TrainingArguments(
        output_dir=checkpoint_dir,
        learning_rate=2e-3,
        per_device_train_batch_size=100,
        per_device_eval_batch_size=100,
        num_train_epochs=5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        fp16=True
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()
validation_lora_q4 = trainer.evaluate()
model4bl.save_pretrained(save_dir, save_embedding_layers=True)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at openai-community/gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 594,432 || all params: 125,038,848 || trainable%: 0.4754




Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.24846,0.9105


In [None]:
print("Original Model accuracy:         ", original_performance['eval_accuracy'])
print("Fine-Tuned Model accuracy:       ", fine_tuned_performance['eval_accuracy'])
print("Fine-Tuned Model 4 bit accuracy: ", validation_lora_q4['eval_accuracy'])

### Experiment with different LoRA parameters

In [None]:
from peft import PeftModelForTokenClassification, LoraConfig, TaskType, get_peft_model
import pandas as pd

torch.cuda.empty_cache()


def create_lora_config(r, lora_alpha, lora_dropout):
    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        task_type=TaskType.TOKEN_CLS,
        fan_in_fan_out=True,
    )

    return peft_config

def create_lora_model(peft_config):
    model_base = create_base_model(model_id)
    model_lora = get_peft_model(model_base, peft_config)

    return model_lora
    
def create_trainer(model, learning_rate, weight_decay):
    trainer = Trainer(
        model=model,
        args=TrainingArguments(
            output_dir='/tmp',
            per_device_train_batch_size=50,
            per_device_eval_batch_size=50,
            num_train_epochs=4,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
        ),
        train_dataset=tokenized_ds["train"],
        eval_dataset=tokenized_ds["test"],
        processing_class=tokenizer,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
        compute_metrics=compute_metrics,
    )

    return trainer

def evaluate_model(model):
    eval = model.evaluate()

results = []

In [None]:
for r in [8, 4, 2]:
    dropout = 0.1
    learning_rate = 2e-3
    weight_decay = 0.01
    alpha = 2 * r
    config = create_lora_config(r, alpha, dropout)
    model = create_lora_model(config)
    trainer = create_trainer(model, learning_rate, weight_decay)

    print("Start training a model with R={}, alpha={} droptout={}".format(r, alpha, dropout))

    trainer.train()
    eval = trainer.evaluate()

    accuracy = eval['eval_accuracy']
    results.append({'r': r, 'alpha': alpha, 'dropout': dropout, 'learning_rate':learning_rate, 'weight_decay': weight_decay, 'accuracy': accuracy})
    df_results = pd.DataFrame(results)
    print(df_results)

In [None]:
for learning_rate in [2e-4, 2e-3, 2e-2]:
    dropout = 0.1
    r = 8
    weight_decay = 0.01
    alpha = 2 * r
    config = create_lora_config(r, alpha, dropout)
    model = create_lora_model(config)
    trainer = create_trainer(model, learning_rate, weight_decay)

    print("Start training a model with R={}, alpha={} droptout={}".format(r, alpha, dropout))

    trainer.train()
    eval = trainer.evaluate()

    accuracy = eval['eval_accuracy']
    results.append({'r': r, 'alpha': alpha, 'dropout': dropout, 'learning_rate':learning_rate, 'weight_decay': weight_decay, 'accuracy': accuracy})
    df_results = pd.DataFrame(results)
    print(df_results)model_id = "openai-community/gpt2"

In [None]:
for dropout in [0.01, 0.1, 0.5]:
    learning_rate = 2e-3
    r = 8
    weight_decay = 0.01
    alpha = 2 * r
    config = create_lora_config(r, alpha, dropout)
    model = create_lora_model(config)
    trainer = create_trainer(model, learning_rate, weight_decay)

    print("Start training a model with R={}, alpha={} droptout={}".format(r, alpha, dropout))

    trainer.train()
    eval = trainer.evaluate()

    accuracy = eval['eval_accuracy']
    results.append({'r': r, 'alpha': alpha, 'dropout': dropout, 'learning_rate':learning_rate, 'weight_decay': weight_decay, 'accuracy': accuracy})
    df_results = pd.DataFrame(results)
    print(df_results)

In [None]:
for weight_decay in [0.001, 0.01, 0.1]:
    learning_rate = 2e-3
    dropout = 0.1
    r = 8
    alpha = 2 * r
    config = create_lora_config(r, alpha, dropout)
    model = create_lora_model(config)
    trainer = create_trainer(model, learning_rate, weight_decay)

    print("Start training a model with R={}, alpha={} droptout={}".format(r, alpha, dropout))

    trainer.train()
    eval = trainer.evaluate()

    accuracy = eval['eval_accuracy']
    results.append({'r': r, 'alpha': alpha, 'dropout': dropout, 'learning_rate':learning_rate, 'weight_decay': weight_decay, 'accuracy': accuracy})
    df_results = pd.DataFrame(results)
    print(df_results)