In [1]:
"""
WEEK 5 - DAY 2-3: Training Large Models with GPU
================================================
Train ResNet50 and EfficientNet on CIFAR-10 to achieve 90%+ accuracy

IMPORTANT: Run this in Google Colab with GPU enabled!

Topics:
- Training larger models (ResNet50, EfficientNet)
- Advanced data augmentation
- Learning rate scheduling
- Mixed precision training
- Saving checkpoints
- Achieving 90%+ accuracy
"""

# ============================================
# SETUP
# ============================================
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np
import time
from tqdm import tqdm
import copy

print("="*70)
print("WEEK 5 - DAY 2-3: GPU Training Large Models")
print("="*70)

# Check environment
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# ============================================
# ADVANCED DATA AUGMENTATION
# ============================================
print("\n>>> Setting up Advanced Data Augmentation")

from torchvision.transforms import InterpolationMode

# Training transforms (aggressive augmentation for CIFAR-10)
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.33)),  # Cutout
])

# Test transforms (no augmentation)
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

print(" Transforms configured")
print("   Training: Crop, Flip, Rotation, ColorJitter, Cutout")
print("   Test: Normalize only")

# ============================================
# LOAD DATASETS
# ============================================
print("\n>>> Loading CIFAR-10")

train_dataset = datasets.CIFAR10(
    root='/content/data',
    train=True,
    download=True,
    transform=train_transform
)

test_dataset = datasets.CIFAR10(
    root='/content/data',
    train=False,
    download=True,
    transform=test_transform
)

# Create validation split
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(
    train_dataset, [train_size, val_size]
)

print(f"Training: {len(train_dataset)}")
print(f"Validation: {len(val_dataset)}")
print(f"Test: {len(test_dataset)}")

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# ============================================
# BUILD MODEL: ResNet50
# ============================================
print("\n>>> Building ResNet50 Model")

def build_resnet50(num_classes=10, pretrained=True):
    """Build ResNet50 for CIFAR-10"""
    model = models.resnet50(pretrained=pretrained)
    
    # Modify first conv for 32x32 images
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.maxpool = nn.Identity()  # Remove maxpool
    
    # Replace final layer
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    
    return model

model = build_resnet50(num_classes=10, pretrained=True)
model = model.to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"âœ… ResNet50 created")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")

# ============================================
# TRAINING SETUP
# ============================================
print("\n>>> Setting up Training")

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer with weight decay
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Learning rate scheduler
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=0.01,
    steps_per_epoch=len(train_loader),
    epochs=30,
    pct_start=0.3
)

print("âœ… Training setup complete")
print(f"   Loss: CrossEntropyLoss")
print(f"   Optimizer: AdamW (lr=0.001, weight_decay=0.01)")
print(f"   Scheduler: OneCycleLR (max_lr=0.01, 30 epochs)")

# ============================================
# TRAINING FUNCTIONS
# ============================================

def train_epoch(model, loader, criterion, optimizer, scheduler, device):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training', leave=False)
    for inputs, targets in pbar:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        # Update progress bar
        pbar.set_postfix({
            'loss': f'{loss.item():.3f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def validate(model, loader, criterion, device):
    """Validate model"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# ============================================
# TRAINING LOOP
# ============================================
print("\n>>> Training ResNet50")

num_epochs = 30
best_val_acc = 0.0
train_losses, train_accs = [], []
val_losses, val_accs = [], []

print("\nEpoch | Train Loss | Train Acc | Val Loss | Val Acc | LR      | Time")
print("-" * 80)

for epoch in range(num_epochs):
    start_time = time.time()
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, scheduler, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    # Validate
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    # Get current LR
    current_lr = optimizer.param_groups[0]['lr']
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
        }, '/content/drive/MyDrive/ai_engineering/best_resnet50.pth')
    
    # Save checkpoint every 5 epochs
    if (epoch + 1) % 5 == 0:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, f'/content/drive/MyDrive/ai_engineering/checkpoint_epoch_{epoch+1}.pth')
    
    elapsed = time.time() - start_time
    print(f"{epoch+1:5d} | {train_loss:10.4f} | {train_acc:9.2f}% | "
          f"{val_loss:8.4f} | {val_acc:7.2f}% | {current_lr:.6f} | {elapsed:.1f}s")

print(f"\nâœ… Training complete!")
print(f"Best validation accuracy: {best_val_acc:.2f}%")

# ============================================
# EVALUATE ON TEST SET
# ============================================
print("\n>>> Evaluating on Test Set")

# Load best model
checkpoint = torch.load('/content/drive/MyDrive/ai_engineering/best_resnet50.pth')
model.load_state_dict(checkpoint['model_state_dict'])

test_loss, test_acc = validate(model, test_loader, criterion, device)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")

# Per-class accuracy
class_correct = [0] * 10
class_total = [0] * 10

model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        c = predicted.eq(targets)
        
        for i in range(len(targets)):
            label = targets[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

print("\nPer-class accuracy:")
for i in range(10):
    acc = 100 * class_correct[i] / class_total[i]
    print(f"{classes[i]:10s}: {acc:5.2f}%")

# ============================================
# VISUALIZATION
# ============================================
print("\n>>> Creating Visualizations")

# Plot training curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(train_losses, label='Train Loss', linewidth=2)
axes[0].plot(val_losses, label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(train_accs, label='Train Acc', linewidth=2)
axes[1].plot(val_accs, label='Val Acc', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/content/drive/MyDrive/ai_engineering/resnet50_training.png', dpi=150, bbox_inches='tight')
print("âœ… Training curves saved")

# Visualize predictions
model.eval()
fig, axes = plt.subplots(3, 5, figsize=(15, 9))

with torch.no_grad():
    dataiter = iter(test_loader)
    images, labels = next(dataiter)
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = outputs.max(1)
    
    for idx, ax in enumerate(axes.flat):
        if idx < len(images):
            # Denormalize
            img = images[idx].cpu()
            img = img * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
            img = img + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
            img = torch.clamp(img, 0, 1)
            img = img.permute(1, 2, 0).numpy()
            
            ax.imshow(img)
            color = 'green' if predicted[idx] == labels[idx] else 'red'
            ax.set_title(f'True: {classes[labels[idx]]}\nPred: {classes[predicted[idx]]}',
                        color=color, fontsize=9)
            ax.axis('off')

plt.tight_layout()
plt.savefig('/content/drive/MyDrive/ai_engineering/resnet50_predictions.png', dpi=150, bbox_inches='tight')
print("âœ… Predictions saved")

# ============================================
# BONUS: EfficientNet-B0
# ============================================
print("\n>>> BONUS: Training EfficientNet-B0")

def build_efficientnet(num_classes=10, pretrained=True):
    """Build EfficientNet-B0"""
    model = models.efficientnet_b0(pretrained=pretrained)
    
    # Replace classifier
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_features, num_classes)
    
    return model

# Train EfficientNet (optional - takes more time)
print("Building EfficientNet-B0...")
efficientnet = build_efficientnet(num_classes=10, pretrained=True)
efficientnet = efficientnet.to(device)

total_params = sum(p.numel() for p in efficientnet.parameters())
print(f"âœ… EfficientNet-B0 created")
print(f"   Parameters: {total_params:,}")
print("\nðŸ’¡ To train EfficientNet, use the same training loop above")
print("   Expected accuracy: 90-92%")

# ============================================
# GPU MEMORY STATS
# ============================================
if torch.cuda.is_available():
    print("\n>>> GPU Memory Usage")
    print(f"Allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
    print(f"Reserved: {torch.cuda.memory_reserved()/1e9:.2f} GB")
    print(f"Max allocated: {torch.cuda.max_memory_allocated()/1e9:.2f} GB")

# ============================================
# KEY TAKEAWAYS
# ============================================
print("\n" + "="*70)
print("KEY TAKEAWAYS - Day 2-3")
print("="*70)
print("""
1. LARGE MODEL TRAINING:
   âœ… ResNet50: 25M parameters
   âœ… EfficientNet-B0: 5M parameters
   âœ… GPU makes training feasible (~10 min/epoch)
   âœ… Expected accuracy: 90-92%

2. ADVANCED AUGMENTATION:
   - RandomCrop with padding
   - RandomHorizontalFlip
   - ColorJitter
   - RandomErasing (Cutout)
   â†’ Improves generalization by 3-5%

3. LEARNING RATE SCHEDULING:
   - OneCycleLR: Best for short training
   - Starts low, peaks mid-training, ends low
   - Faster convergence than fixed LR

4. CHECKPOINTING:
   - Save best model (highest val acc)
   - Save periodic checkpoints
   - Resume training if interrupted

5. GPU OPTIMIZATION:
   - Use pin_memory=True in DataLoader
   - Use num_workers=2 for data loading
   - Monitor GPU memory
   - Clear cache if needed: torch.cuda.empty_cache()

6. RESULTS:
   - Baseline (CPU, Week 3): 65-75%
   - With transfer learning (Week 4): 82%
   - With GPU + larger model: 90-92% âœ¨

TRAINING TIME COMPARISON:
- CPU (ResNet18): ~5 min/epoch
- GPU (ResNet18): ~30 sec/epoch (10x faster)
- GPU (ResNet50): ~1.5 min/epoch
- GPU (EfficientNet): ~2 min/epoch

NEXT: Model optimization and deployment!
""")
print("="*70)

print("\nâœ… Day 2-3 Complete!")
print("\nSaved files:")
print("  - best_resnet50.pth (model weights)")
print("  - resnet50_training.png (training curves)")
print("  - resnet50_predictions.png (sample predictions)")
print("\nAll files saved to Google Drive!")

WEEK 5 - DAY 2-3: GPU Training Large Models

Using device: cuda
GPU: Tesla T4
Memory: 15.83 GB

>>> Setting up Advanced Data Augmentation
 Transforms configured
   Training: Crop, Flip, Rotation, ColorJitter, Cutout
   Test: Normalize only

>>> Loading CIFAR-10
Training: 45000
Validation: 5000
Test: 10000

>>> Building ResNet50 Model




Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 97.8M/97.8M [00:00<00:00, 219MB/s]


âœ… ResNet50 created
   Total parameters: 23,520,842
   Trainable parameters: 23,520,842

>>> Setting up Training
âœ… Training setup complete
   Loss: CrossEntropyLoss
   Optimizer: AdamW (lr=0.001, weight_decay=0.01)
   Scheduler: OneCycleLR (max_lr=0.01, 30 epochs)

>>> Training ResNet50

Epoch | Train Loss | Train Acc | Val Loss | Val Acc | LR      | Time
--------------------------------------------------------------------------------


                                                                                   

RuntimeError: Parent directory /content/drive/MyDrive/ai_engineering does not exist.