# Week 09 — PyTorch Fundamentals

This notebook gets you practical with PyTorch. You'll:
- Master tensors, broadcasting, and autograd
- Build models using `nn.Module`
- Implement training loops with DataLoader
- Save and load checkpoints

In [None]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

torch.manual_seed(42)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 1. Tensors and Autograd

Learn tensor operations, broadcasting, and automatic differentiation.

In [None]:
# Creating tensors
x = torch.tensor([1.0, 2.0, 3.0])
y = torch.randn(3, 4)
z = torch.zeros(2, 3)

print("Tensor x:", x)
print("Tensor y shape:", y.shape)
print("Tensor z:", z)

# Broadcasting
a = torch.tensor([[1.0], [2.0], [3.0]])  # shape: (3, 1)
b = torch.tensor([10.0, 20.0, 30.0])    # shape: (3,)
c = a + b  # Broadcasting: (3, 1) + (3,) -> (3, 3)
print("\nBroadcasting result shape:", c.shape)
print(c)

# Autograd basics
x = torch.tensor(2.0, requires_grad=True)
y = x ** 2 + 3 * x + 1
print(f"\ny = x² + 3x + 1")
print(f"y({x.item()}) = {y.item()}")

# Compute gradient
y.backward()
print(f"dy/dx at x={x.item()}: {x.grad.item()}")
print(f"Analytic dy/dx = 2x + 3 = {2*x.item() + 3}")

## 2. Build a Model with nn.Module

Re-implement the from-scratch neural network using PyTorch's `nn.Module`.

In [None]:
# Define a simple MLP
class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Create model
model = SimpleMLP(input_size=10, hidden_size=50, output_size=2)
print(model)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

# Forward pass
x_sample = torch.randn(5, 10)
output = model(x_sample)
print(f"\nInput shape: {x_sample.shape}")
print(f"Output shape: {output.shape}")

## 3. Training Loop with DataLoader

Implement a complete training loop with mini-batches.

In [None]:
# Generate synthetic dataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X_np, y_np = make_classification(n_samples=1000, n_features=10, n_informative=8,
                                 n_classes=2, random_state=42)
X_train_np, X_test_np, y_train_np, y_test_np = train_test_split(
    X_np, y_np, test_size=0.2, random_state=42
)

# Convert to PyTorch tensors
X_train = torch.FloatTensor(X_train_np)
y_train = torch.LongTensor(y_train_np)
X_test = torch.FloatTensor(X_test_np)
y_test = torch.LongTensor(y_test_np)

# Create DataLoaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Batches per epoch: {len(train_loader)}")

In [None]:
# Training function
def train_epoch(model, train_loader, criterion, optimizer):
    """Train for one epoch"""
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0
    
    for batch_x, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Track metrics
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == batch_y).sum().item()
        total += batch_y.size(0)
    
    return total_loss / len(train_loader), correct / total

def evaluate(model, test_loader, criterion):
    """Evaluate model"""
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)
    
    return total_loss / len(test_loader), correct / total

# Train model
model = SimpleMLP(input_size=10, hidden_size=64, output_size=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

n_epochs = 50
train_losses, test_losses = [], []
train_accs, test_accs = [], []

print("Training...")
for epoch in range(n_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = evaluate(model, test_loader, criterion)
    
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{n_epochs}: "
              f"Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, "
              f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.4f}")

# Plot results
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(train_losses, label='Train', linewidth=2)
axes[0].plot(test_losses, label='Test', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss Curves')
axes[0].legend()
axes[0].grid(alpha=0.3)

axes[1].plot(train_accs, label='Train', linewidth=2)
axes[1].plot(test_accs, label='Test', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy Curves')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Checkpointing: Save and Load Models

Save model state and resume training.

In [None]:
# Save checkpoint
checkpoint = {
    'epoch': n_epochs,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'train_loss': train_losses[-1],
    'test_loss': test_losses[-1],
}

torch.save(checkpoint, 'model_checkpoint.pth')
print("Checkpoint saved to 'model_checkpoint.pth'")

# Load checkpoint
loaded_model = SimpleMLP(input_size=10, hidden_size=64, output_size=2)
loaded_optimizer = optim.Adam(loaded_model.parameters(), lr=0.001)

checkpoint = torch.load('model_checkpoint.pth')
loaded_model.load_state_dict(checkpoint['model_state_dict'])
loaded_optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']

print(f"\nCheckpoint loaded from epoch {epoch}")

# Verify loaded model
loaded_model.eval()
test_loss, test_acc = evaluate(loaded_model, test_loader, criterion)
print(f"Loaded model test accuracy: {test_acc:.4f}")

# Compare parameters
params_match = all(torch.equal(p1, p2) for p1, p2 in zip(model.parameters(), loaded_model.parameters()))
print(f"Parameters match: {params_match}")

## 5. Gradient Comparison with Manual Implementation

Compare PyTorch autograd to manual gradients from Week 07.

In [None]:
# Simple function: y = w1*x1 + w2*x2 + b
x1 = torch.tensor(2.0, requires_grad=True)
x2 = torch.tensor(3.0, requires_grad=True)
w1 = torch.tensor(0.5, requires_grad=True)
w2 = torch.tensor(-0.3, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)

# Forward
y = w1 * x1 + w2 * x2 + b
print(f"y = {y.item():.4f}")

# Backward (autograd)
y.backward()

print("\nAutograd gradients:")
print(f"  dy/dw1 = {w1.grad.item():.4f}")
print(f"  dy/dw2 = {w2.grad.item():.4f}")
print(f"  dy/db  = {b.grad.item():.4f}")

# Manual gradients (should match)
print("\nManual gradients:")
print(f"  dy/dw1 = x1 = {x1.item():.4f}")
print(f"  dy/dw2 = x2 = {x2.item():.4f}")
print(f"  dy/db  = 1  = 1.0000")

print("\n→ Autograd matches manual computation!")

## Exercises for Further Practice

1. **Custom Dataset**: Implement a custom `Dataset` class for image data
2. **Learning Rate Scheduler**: Add `torch.optim.lr_scheduler` and compare schedules
3. **GPU Training**: Move model and data to GPU (if available) and measure speedup
4. **Data Augmentation**: Implement transforms for data augmentation
5. **TensorBoard**: Log metrics to TensorBoard for visualization

## Deliverables Checklist

- [ ] PyTorch model implementation using nn.Module
- [ ] Training loop with DataLoader
- [ ] Checkpoint save/load demonstration
- [ ] Gradient comparison between autograd and manual computation

## Recommended Resources

- Official PyTorch tutorials (60-minute blitz)
- PyTorch documentation on autograd and nn.Module
- Fast.ai course (practical deep learning with PyTorch)