# Apply Lightweight Fine-Tuning to Foundation Model

## Objectives

1. Load a pre-trained model and evaluate its performance2. 
Perform parameter-efficient fine-tuning using the pre-trained mode
3. 
Perform inference using the fine-tuned model and compare its performance to the original model

## Import Packages

In [15]:
import numpy as np
import pandas as pd

from datasets import load_dataset

from transformers import (AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments)

from peft import LoraConfig, get_peft_model, TaskType
import evaluate

# Instantiate Metrics
metric = evaluate.load("accuracy")

## Load a Pre-trained model and evaluate its performance.

- Dataset: IMDB (https://huggingface.co/datasets/imdb)
- Model: DistilBERT-BASE-Uncased (https://huggingface.co/distilbert/distilbert-base-uncased)

In [16]:
# Download the Datasets
# Load the train and test splits of the imdb dataset
splits = ["train", "test"]
ds = {split: ds for split, ds in zip(splits, load_dataset("imdb", split=splits))}

# Thin out the dataset to make it run faster for this example
for split in splits:
    ds[split] = ds[split].shuffle(seed=42)

# Show the dataset
ds

{'train': Dataset({
     features: ['text', 'label'],
     num_rows: 25000
 }),
 'test': Dataset({
     features: ['text', 'label'],
     num_rows: 25000
 })}

In [17]:
# Pre Process Data
# Get the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Preprocess Function
def preprocess_function(examples):
    """Preprocess the imdb dataset by returning tokenized examples."""
    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Tokenize the dataset
tokenized_ds = {}
for split in splits:
    tokenized_ds[split] = ds[split].map(preprocess_function, batched=True)

# Show the first example of the tokenized training set
print(tokenized_ds["train"][0]["input_ids"])

[101, 2045, 2003, 2053, 7189, 2012, 2035, 2090, 3481, 3771, 1998, 6337, 2099, 2021, 1996, 2755, 2008, 2119, 2024, 2610, 2186, 2055, 6355, 6997, 1012, 6337, 2099, 3504, 15594, 2100, 1010, 3481, 3771, 3504, 4438, 1012, 6337, 2099, 14811, 2024, 3243, 3722, 1012, 3481, 3771, 1005, 1055, 5436, 2024, 2521, 2062, 8552, 1012, 1012, 1012, 3481, 3771, 3504, 2062, 2066, 3539, 8343, 1010, 2065, 2057, 2031, 2000, 3962, 12319, 1012, 1012, 1012, 1996, 2364, 2839, 2003, 5410, 1998, 6881, 2080, 1010, 2021, 2031, 1000, 17936, 6767, 7054, 3401, 1000, 1012, 2111, 2066, 2000, 12826, 1010, 2000, 3648, 1010, 2000, 16157, 1012, 2129, 2055, 2074, 9107, 1029, 6057, 2518, 2205, 1010, 2111, 3015, 3481, 3771, 3504, 2137, 2021, 1010, 2006, 1996, 2060, 2192, 1010, 9177, 2027, 9544, 2137, 2186, 1006, 999, 999, 999, 1007, 1012, 2672, 2009, 1005, 1055, 1996, 2653, 1010, 2030, 1996, 4382, 1010, 2021, 1045, 2228, 2023, 2186, 2003, 2062, 2394, 2084, 2137, 1012, 2011, 1996, 2126, 1010, 1996, 5889, 2024, 2428, 2204, 1998, 6

In [18]:
# Load the Model and Freeze the Parameters
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2,
    id2label={0: "NEGATIVE", 1: "POSITIVE"},  # For converting predictions to strings
    label2id={"NEGATIVE": 0, "POSITIVE": 1},
)
# Freeze all the parameters of the base model
for param in model.base_model.parameters():
    param.requires_grad = False

model.classifier

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Linear(in_features=768, out_features=2, bias=True)

In [20]:
# Evaluate Pre-Trained Model
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

# Create The Trainer Class
trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis",
        learning_rate=2e-3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=2,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.evaluate()

{'eval_loss': 0.45447051525115967,
 'eval_accuracy': 0.79056,
 'eval_runtime': 754.9461,
 'eval_samples_per_second': 33.115,
 'eval_steps_per_second': 8.279}

## Perform parameter-efficient fine-tuning using the pre-trained model

- Use PEFT library (https://huggingface.co/docs/peft/en/index)

In [21]:
# Get the Pre-Trained Model
# Load the Model and Freeze the Parameters
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2,
    id2label={0: "NEGATIVE", 1: "POSITIVE"},  # For converting predictions to strings
    label2id={"NEGATIVE": 0, "POSITIVE": 1},
)
# Freeze all the parameters of the base model
for param in model.base_model.parameters():
    param.requires_grad = True

model.classifier

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Linear(in_features=768, out_features=2, bias=True)

In [22]:
config = LoraConfig(
    task_type=TaskType.SEQ_CLS, 
    r=1, 
    lora_alpha=1, 
    lora_dropout=0.1)

In [23]:
lora_model = get_peft_model(model, config)

In [24]:
lora_model.print_trainable_parameters()

trainable params: 38,402 || all params: 109,522,180 || trainable%: 0.035063217331868304


In [25]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [26]:
# Create The Trainer Class
trainer = Trainer(
    model=lora_model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis",
        learning_rate=2e-3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=2,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2801,0.247465,0.913
2,0.1912,0.233487,0.9298


TrainOutput(global_step=12500, training_loss=0.30033203857421875, metrics={'train_runtime': 4936.2621, 'train_samples_per_second': 10.129, 'train_steps_per_second': 2.532, 'total_flos': 1.31614513152e+16, 'train_loss': 0.30033203857421875, 'epoch': 2.0})

In [27]:
lora_model.save_pretrained("bert-base-lora")

## Perform inference using the fine-tuned model and compare its performance to the original model.

In [28]:
# Load Saved PEFT Model
from peft import AutoPeftModelForSequenceClassification

loaded_lora_model = AutoPeftModelForSequenceClassification.from_pretrained("bert-base-lora")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
# Evaluate PEFT Fine-Tuned Model
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

# Create The Trainer Class
trainer = Trainer(
    model=loaded_lora_model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis",
        learning_rate=2e-3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=2,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.evaluate()

{'eval_loss': 0.23348669707775116,
 'eval_accuracy': 0.9298,
 'eval_runtime': 771.6494,
 'eval_samples_per_second': 32.398,
 'eval_steps_per_second': 8.1}