# Fine-tuning BERT for Sentiment Analysis

This notebook demonstrates how to fine-tune a pre-trained BERT model for sentiment analysis on the IMDB dataset.

## What is Fine-tuning?

Fine-tuning is the process of taking a pre-trained model and further training it on a specific task or dataset. This approach leverages the knowledge learned during pre-training while adapting the model to your specific needs.

## Why Fine-tune?

1. **Better Performance**: Fine-tuned models typically outperform generic pre-trained models on specific tasks
2. **Less Data Required**: Pre-training provides a strong starting point, so less task-specific data is needed
3. **Faster Training**: Starting from pre-trained weights converges faster than training from scratch

In [None]:
# Import required libraries
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    AdamW,
    get_linear_schedule_with_warmup
)
from datasets import load_dataset
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

## Load and Prepare the IMDB Dataset

In [None]:
# Load IMDB dataset
print('Loading IMDB dataset...')
# Using smaller subset for faster training in this demo
train_dataset = load_dataset('imdb', split='train[:2000]')
test_dataset = load_dataset('imdb', split='test[:500]')

print(f'Training samples: {len(train_dataset)}')
print(f'Test samples: {len(test_dataset)}')

# Show sample
print(f'\nSample review:')
print(f"Text: {train_dataset[0]['text'][:200]}...")
print(f"Label: {'Positive' if train_dataset[0]['label'] == 1 else 'Negative'}")

## Load Pre-trained BERT Model and Tokenizer

In [None]:
# Load BERT tokenizer and model
model_name = 'bert-base-uncased'
print(f'Loading {model_name}...')

tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,  # Binary classification (positive/negative)
    output_attentions=False,
    output_hidden_states=False
)

model = model.to(device)
print(f'Model parameters: {sum(p.numel() for p in model.parameters()):,}')

## Create Custom Dataset Class

In [None]:
class IMDBDataset(Dataset):
    def __init__(self, dataset, tokenizer, max_length=512):
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        item = self.dataset[idx]
        text = item['text']
        label = item['label']
        
        # Tokenize
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Create datasets
train_data = IMDBDataset(train_dataset, tokenizer, max_length=256)
test_data = IMDBDataset(test_dataset, tokenizer, max_length=256)

# Create dataloaders
batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

print(f'Training batches: {len(train_loader)}')
print(f'Test batches: {len(test_loader)}')

## Setup Training Configuration

In [None]:
# Training hyperparameters
epochs = 3
learning_rate = 2e-5

# Setup optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate, eps=1e-8)

# Setup learning rate scheduler
total_steps = len(train_loader) * epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)

print(f'Total training steps: {total_steps}')

## Training Loop

In [None]:
def train_epoch(model, dataloader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(dataloader, desc='Training')
    for batch in progress_bar:
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        # Forward pass
        model.zero_grad()
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_loss += loss.item()
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        # Update weights
        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'loss': loss.item()})
    
    return total_loss / len(dataloader)

def evaluate(model, dataloader, device):
    model.eval()
    predictions = []
    true_labels = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc='Evaluating'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(true_labels, predictions)
    return accuracy, predictions, true_labels

## Train the Model

In [None]:
# Training history
train_losses = []
train_accuracies = []
test_accuracies = []

print('Starting fine-tuning...\n')

for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}')
    print('-' * 50)
    
    # Train
    train_loss = train_epoch(model, train_loader, optimizer, scheduler, device)
    train_losses.append(train_loss)
    
    # Evaluate on training set
    train_acc, _, _ = evaluate(model, train_loader, device)
    train_accuracies.append(train_acc)
    
    # Evaluate on test set
    test_acc, _, _ = evaluate(model, test_loader, device)
    test_accuracies.append(test_acc)
    
    print(f'Train Loss: {train_loss:.4f}')
    print(f'Train Accuracy: {train_acc:.4f}')
    print(f'Test Accuracy: {test_acc:.4f}\n')

print('Fine-tuning completed!')

## Visualize Training Progress

In [None]:
# Plot training curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss curve
ax1.plot(range(1, epochs + 1), train_losses, marker='o', label='Training Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training Loss Over Time')
ax1.legend()
ax1.grid(True)

# Accuracy curves
ax2.plot(range(1, epochs + 1), train_accuracies, marker='o', label='Train Accuracy')
ax2.plot(range(1, epochs + 1), test_accuracies, marker='s', label='Test Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_title('Accuracy Over Time')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

## Final Evaluation

In [None]:
# Final evaluation
print('Final Evaluation on Test Set:\n')
test_acc, predictions, true_labels = evaluate(model, test_loader, device)

print(f'Test Accuracy: {test_acc:.4f}\n')
print('Classification Report:')
print(classification_report(true_labels, predictions, 
                          target_names=['Negative', 'Positive']))

## Test on Custom Examples

In [None]:
def predict_sentiment(text, model, tokenizer, device):
    """
    Predict sentiment for a given text
    """
    model.eval()
    
    encoding = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=256,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1)
        prediction = torch.argmax(logits, dim=1).item()
    
    sentiment = 'Positive' if prediction == 1 else 'Negative'
    confidence = probs[0][prediction].item()
    
    return sentiment, confidence

# Test on custom examples
test_reviews = [
    "This movie was absolutely fantastic! I loved every minute of it.",
    "Terrible film. Waste of time and money. Very disappointed.",
    "The acting was good but the plot was confusing and boring.",
    "One of the best movies I've ever seen. Highly recommended!",
    "Not great, not terrible. Just an average movie."
]

print('Predictions on Custom Reviews:\n')
for i, review in enumerate(test_reviews, 1):
    sentiment, confidence = predict_sentiment(review, model, tokenizer, device)
    print(f"Review {i}: {review}")
    print(f"Predicted: {sentiment} (confidence: {confidence:.2%})")
    print("-" * 80)

## Save the Fine-tuned Model

In [None]:
# Save model and tokenizer
output_dir = './finetuned_bert_sentiment'

print(f'Saving model to {output_dir}...')
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print('Model saved successfully!')

# To load the model later:
# model = BertForSequenceClassification.from_pretrained(output_dir)
# tokenizer = BertTokenizer.from_pretrained(output_dir)

## Key Takeaways

1. **Fine-tuning Process**: Start with a pre-trained model and adapt it to your specific task
2. **Data Efficiency**: Fine-tuning requires less data than training from scratch
3. **Learning Rate**: Use a small learning rate (2e-5) to avoid catastrophic forgetting
4. **Layer Freezing**: You can optionally freeze early layers and only train the later layers
5. **Performance**: Fine-tuned models typically achieve higher accuracy on specific tasks

## Comparison: Pre-trained vs Fine-tuned

- **Pre-trained models**: General-purpose, work reasonably well across many tasks
- **Fine-tuned models**: Task-specific, achieve better performance on the target task

## Next Steps

- Experiment with different learning rates and batch sizes
- Try freezing some layers and only fine-tuning the top layers
- Use different pre-trained models (RoBERTa, DistilBERT, etc.)
- Apply fine-tuning to other tasks (NER, QA, text classification, etc.)