# Parameter-Efficient Fine-Tuning with PEFT

This notebook demonstrates how to use the PEFT library for efficient model adaptation using the GPT-2 model on a sequence classification task.

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from peft import (
    LoraConfig,
    TaskType,
    get_peft_model,
    PeftModel,
    PeftConfig
)
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

## Loading and Evaluating the Foundation Model

First, we'll load our dataset and prepare it for training.

In [None]:
# Enable memory optimization
torch.backends.cuda.matmul.allow_tf32 = True
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True

# Load dataset with improved error handling and progress tracking
def load_app_reviews():
    print("\nLoading app reviews dataset...\n")
    dataset = None
    for attempt in range(1, 4):
        try:
            print(f"Attempt {attempt}/3 to load dataset...")
            dataset = load_dataset(
                "csv", 
                data_files="app_reviews.csv",
                split="train"
            )
            break
        except Exception as e:
            if attempt == 3:
                raise e
            print(f"Attempt {attempt} failed: {str(e)}\nRetrying...")
    
    if dataset is None:
        raise RuntimeError("Failed to load dataset after multiple attempts")
        
    print(f"Dataset loaded successfully with {len(dataset)} examples!")
    print(f"Dataset features: {dataset.features}")
    print(f"\nSample entry: {dataset[0]}")
    
    # Add basic dataset statistics
    ratings = dataset['rating']
    print("\nRating distribution:")
    for rating in range(1, 6):
        count = sum(1 for r in ratings if r == rating)
        print(f"{rating} stars: {count} reviews ({count/len(ratings)*100:.1f}%)")
    
    return dataset

dataset = load_app_reviews()

# Stratified split to maintain rating distribution
def stratified_split(dataset, train_ratio=0.8):
    ratings = dataset['rating']
    indices_by_rating = {i: [] for i in range(1, 6)}
    
    for idx, rating in enumerate(ratings):
        indices_by_rating[rating].append(idx)
    
    train_indices = []
    eval_indices = []
    
    for rating_indices in indices_by_rating.values():
        np.random.shuffle(rating_indices)
        split_idx = int(len(rating_indices) * train_ratio)
        train_indices.extend(rating_indices[:split_idx])
        eval_indices.extend(rating_indices[split_idx:])
    
    train_dataset = dataset.select(train_indices)
    eval_dataset = dataset.select(eval_indices)
    
    return train_dataset, eval_dataset

train_dataset, eval_dataset = stratified_split(dataset)
print(f"\nSplit complete. Train size: {len(train_dataset)}, Val size: {len(eval_dataset)}")

In [None]:
# Initialize model and tokenizer with optimizations
model_name = "gpt2"
num_labels = 5  # 1-5 star ratings

tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map='auto'
)

# Enable gradient checkpointing for memory efficiency
model.gradient_checkpointing_enable()

# Improved tokenization function with dynamic padding
def tokenize_function(examples):
    return tokenizer(
        examples["review"],
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors=None
    )

# Tokenize datasets with progress bar
print("\nTokenizing datasets...")
train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    desc="Tokenizing train dataset"
)
eval_dataset = eval_dataset.map(
    tokenize_function,
    batched=True,
    desc="Tokenizing eval dataset"
)

## Performing Parameter-Efficient Fine-Tuning

In [None]:
# Create PEFT config with optimized parameters
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=16,  # Increased rank for better expressiveness
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=["c_attn", "c_proj"],  # Targeting specific modules
)

# Create PEFT model
peft_model = get_peft_model(model, peft_config)
print("\nModel configuration:")
print(f"Base model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {peft_model.print_trainable_parameters()}")

In [None]:
# Enhanced metrics computation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Calculate basic metrics
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    
    # Calculate confusion matrix
    cm = confusion_matrix(labels, preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig('confusion_matrices.png')
    plt.close()
    
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Optimized training configuration
training_args = TrainingArguments(
    output_dir="peft_model",
    learning_rate=2e-4,  # Increased learning rate
    per_device_train_batch_size=16,  # Larger batch size
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
    remove_unused_columns=True,
    fp16=torch.cuda.is_available(),  # Enable mixed precision training
    gradient_accumulation_steps=2,
    logging_steps=50,
    warmup_ratio=0.1
)

# Initialize trainer
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Train the model with progress tracking
print("\nStarting training...")
train_result = trainer.train()
print(f"\nTraining completed! Final metrics: {train_result.metrics}")

## Model Evaluation and Comparison

In [None]:
def predict_rating(model, text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        with torch.cuda.amp.autocast() if torch.cuda.is_available() else nullcontext():
            outputs = model(**inputs)
            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
            confidence = probabilities.max().item()
    
    predicted_rating = outputs.logits.argmax().item() + 1
    return predicted_rating, confidence

# Example reviews for testing
test_reviews = [
    "Great app! Works perfectly and has all the features I need.",
    "Terrible experience. Crashes constantly and lost my data.",
    "Pretty good but could use some improvements in the UI.",
    "Average app, nothing special but gets the job done.",
    "Mostly positive experience with minor bugs."
]

print("\nComparing base model vs PEFT model predictions:\n")
print("{:<60} {:<15} {:<15} {:<15} {:<15}".format(
    "Review", "Base Rating", "Base Conf.", "PEFT Rating", "PEFT Conf."
))
print("-" * 120)

for review in test_reviews:
    base_pred, base_conf = predict_rating(model, review)
    peft_pred, peft_conf = predict_rating(peft_model, review)
    
    print("{:<60} {:<15d} {:<15.2f} {:<15d} {:<15.2f}".format(
        review[:57] + "...",
        base_pred,
        base_conf * 100,
        peft_pred,
        peft_conf * 100
    ))