# Classic Fine-tuning Example

This notebook demonstrates full fine-tuning of a pre-trained language model for text classification.

**Requirements:**
- GPU Runtime (T4 or better)
- ~8GB VRAM for this example

In [None]:
# Install required libraries
!pip install -q transformers datasets accelerate evaluate

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import load_dataset
import numpy as np
from evaluate import load

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 1. Load and Prepare Dataset

In [None]:
# Load IMDB dataset for sentiment analysis
dataset = load_dataset("imdb")

# Use a smaller subset for quick training
train_dataset = dataset["train"].shuffle(seed=42).select(range(2000))
eval_dataset = dataset["test"].shuffle(seed=42).select(range(500))

print(f"Training samples: {len(train_dataset)}")
print(f"Evaluation samples: {len(eval_dataset)}")
print(f"\nExample: {train_dataset[0]}")

## 2. Load Pre-trained Model and Tokenizer

In [None]:
# Use a small BERT model for this example
model_name = "distilbert-base-uncased"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with classification head
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,  # Binary classification
    id2label={0: "NEGATIVE", 1: "POSITIVE"},
    label2id={"NEGATIVE": 0, "POSITIVE": 1}
)

# Move model to GPU
model = model.to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Percentage trainable: {100 * trainable_params / total_params:.2f}%")

## 3. Tokenize Dataset

In [None]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=256
    )

# Tokenize datasets
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

# Create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

## 4. Define Training Arguments

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    fp16=True,  # Mixed precision training
    gradient_checkpointing=False,
    report_to="none"
)

print(f"Total training steps: {len(tokenized_train) // training_args.per_device_train_batch_size * training_args.num_train_epochs}")

## 5. Define Metrics

In [None]:
# Load accuracy metric
accuracy = load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

## 6. Initialize Trainer and Train

In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Start training
print("Starting training...")
train_result = trainer.train()

# Save the final model
trainer.save_model("./fine_tuned_model")

# Print training results
print(f"\nTraining completed!")
print(f"Training loss: {train_result.training_loss:.4f}")

## 7. Evaluate Model

In [None]:
# Evaluate on test set
eval_results = trainer.evaluate()

print(f"Evaluation Results:")
print(f"Loss: {eval_results['eval_loss']:.4f}")
print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")

## 8. Test Model with Examples

In [None]:
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
    
    label = model.config.id2label[predicted_class]
    confidence = predictions[0][predicted_class].item()
    
    return label, confidence

# Test examples
test_texts = [
    "This movie was absolutely fantastic! I loved every minute of it.",
    "Terrible film. Waste of time and money.",
    "The plot was interesting but the execution could have been better.",
    "Outstanding performance by the lead actor. Highly recommend!",
    "I fell asleep halfway through. So boring."
]

for text in test_texts:
    label, confidence = predict_sentiment(text)
    print(f"Text: {text[:50]}...")
    print(f"Prediction: {label} (Confidence: {confidence:.2%})")
    print()

## Key Takeaways

1. **Full fine-tuning** updates all model parameters
2. **Memory requirements** scale with model size
3. **Training time** depends on dataset size and epochs
4. **Mixed precision** (fp16) reduces memory usage
5. **Evaluation metrics** guide model selection

### Next Steps
- Try different learning rates
- Experiment with larger models
- Use more training data
- Implement early stopping
- Try different optimizers (AdamW, Adafactor)