# VGG16 Fine-tuning - Cats vs Dogs
## Google Colab Training Notebook

Questo notebook implementa il fine-tuning di VGG16 seguendo lo stile del Lab3 con integrazione Wandb.

**Assicurati di:**
- ✅ Abilitare GPU: Runtime → Change runtime type → GPU
- ✅ Avere un account Wandb: [wandb.ai](https://wandb.ai)

## 1️⃣ Setup - Clone Repository e Install Dependencies

In [None]:
# Clone repository
!git clone https://github.com/ghMellow/polito-aml-project_skeleton.git
%cd polito-aml-project_skeleton

# Install dependencies
!pip install -q torch torchvision numpy pandas Pillow torchsummary wandb kagglehub matplotlib

print("\n✅ Setup completato!")

## 2️⃣ Wandb Authentication

Ottieni la tua API key da: https://wandb.ai/authorize

In [None]:
import wandb

# Inserisci la tua API key da: https://wandb.ai/authorize
wandb.login(key="YOUR_API_KEY_HERE")

print("✅ Wandb autenticato!")


## 3️⃣ Download Dataset Cats vs Dogs

In [None]:
import kagglehub
import shutil
import os

# Download dataset
print("Downloading dataset... (potrebbe richiedere alcuni minuti)")
path = kagglehub.dataset_download("tongpython/cat-and-dog")
print(f"✓ Dataset scaricato in: {path}")

# Move to data folder
os.makedirs('./data', exist_ok=True)
for item in os.listdir(path):
    src = os.path.join(path, item)
    dst = os.path.join('./data', item)
    if not os.path.exists(dst):
        shutil.move(src, dst)
        print(f"  - Moved {item}")

print("\n✅ Dataset pronto!")
print("Struttura:")
!ls -la ./data/

## 4️⃣ Metodo 1: Training con CLI (Veloce)

Usa direttamente lo script `train.py` con Wandb integrato.

In [None]:
# Training con script CLI
!python train.py \
    --data_dir ./data \
    --epochs 10 \
    --batch_size 128 \
    --lr 0.0001 \
    --use_wandb \
    --wandb_project "vgg16-cats-vs-dogs-colab"

### Evaluation (Metodo CLI)

In [None]:
!python eval.py \
    --checkpoint ./checkpoints/best_model.pth \
    --data_dir ./data

---

## 5️⃣ Metodo 2: Training Programmatico (Stile Slide Professore)

Controllo completo del training loop seguendo lo stile dello slide.

In [None]:
# Imports
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import wandb

# Custom modules
from dataset import CustomImageDataset, create_annotations_csv
from models import create_vgg16_model, count_trainable_parameters
from utils import get_train_transforms, get_val_test_transforms, plot_training_history

print("✅ Imports completati!")

In [None]:
# 1. Start a W&B run (SLIDE PROFESSORE)
wandb.init(
    project='vgg16-cats-vs-dogs',
    name='feature-extraction-colab',
    tags=['vgg16', 'transfer-learning', 'cats-vs-dogs', 'colab']
)

# 2. Save model inputs and hyperparameters (SLIDE PROFESSORE)
config = wandb.config
config.learning_rate = 0.0001
config.batch_size = 128
config.epochs = 10
config.momentum = 0.9
config.val_split = 0.2
config.architecture = 'VGG16'
config.mode = 'feature_extraction'
config.num_classes = 2

print("✅ Wandb inizializzato")
print(f"  - Project: {wandb.run.project}")
print(f"  - Run: {wandb.run.name}")
print(f"  - URL: {wandb.run.url}")

In [None]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✓ Device: {device}")

# Dataset paths
data_dir = './data'
train_path = os.path.join(data_dir, 'training_set/training_set')
test_path = os.path.join(data_dir, 'test_set/test_set')

# Create annotations
if not os.path.exists('train_annotations.csv'):
    print("Creando annotations...")
    create_annotations_csv(train_path, 'train_annotations.csv')
    create_annotations_csv(test_path, 'test_annotations.csv')

# Transforms
train_transform = get_train_transforms()
val_transform = get_val_test_transforms()

# Datasets
train_dataset = CustomImageDataset('train_annotations.csv', train_path, transform=train_transform)
valid_dataset = CustomImageDataset('train_annotations.csv', train_path, transform=val_transform)

# Split
indices = list(range(len(train_dataset)))
split = int(np.floor(config.val_split * len(train_dataset)))
train_sample = SubsetRandomSampler(indices[split:])
valid_sample = SubsetRandomSampler(indices[:split])

# Dataloaders
trainloader = DataLoader(train_dataset, sampler=train_sample, batch_size=config.batch_size)
validloader = DataLoader(valid_dataset, sampler=valid_sample, batch_size=config.batch_size)

print(f"✅ Dataset pronto")
print(f"  - Training samples: {len(indices[split:])}")
print(f"  - Validation samples: {len(indices[:split])}")
print(f"  - Batch size: {config.batch_size}")

In [None]:
# Create model
model = create_vgg16_model(num_classes=2, pretrained=True, freeze_base=True)
model = model.to(device)

trainable, total = count_trainable_parameters(model)
print(f"✅ Model creato")
print(f"  - Trainable: {trainable:,} / {total:,}")
print(f"  - Mode: Feature Extraction")

# Watch model with wandb
wandb.watch(model, log='all', log_freq=100)
print("  - Wandb watching gradients")

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.SGD(parameters_to_optimize, lr=config.learning_rate, momentum=config.momentum)

print("✅ Training setup")
print(f"  - Loss: CrossEntropyLoss")
print(f"  - Optimizer: SGD (lr={config.learning_rate}, momentum={config.momentum})")

In [None]:
# Training loop
best_val_loss = float('inf')
train_losses = []
valid_losses = []
valid_accuracies = []

print("\n" + "="*60)
print("STARTING TRAINING")
print("="*60 + "\n")

for epoch in range(config.epochs):
    # === TRAINING ===
    model.train()
    running_loss = 0.0
    
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    train_loss = running_loss / len(trainloader)
    train_losses.append(train_loss)
    
    # === VALIDATION ===
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in validloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    valid_loss = running_loss / len(validloader)
    valid_acc = correct / total
    valid_losses.append(valid_loss)
    valid_accuracies.append(valid_acc)
    
    # 3. Log metrics over time to visualize performance (SLIDE PROFESSORE)
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "valid_loss": valid_loss,
        "valid_accuracy": valid_acc,
        "learning_rate": config.learning_rate
    })
    
    print(f"Epoch [{epoch+1}/{config.epochs}] "
          f"Train Loss: {train_loss:.4f} | "
          f"Valid Loss: {valid_loss:.4f} | "
          f"Valid Acc: {valid_acc:.4f}")
    
    # Save best model
    if valid_loss < best_val_loss:
        best_val_loss = valid_loss
        os.makedirs('./checkpoints', exist_ok=True)
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'valid_loss': valid_loss,
            'valid_accuracy': valid_acc,
        }, './checkpoints/best_model.pth')
        print(f"  ✓ New best model saved!")

print("\n" + "="*60)
print("TRAINING COMPLETED")
print(f"Best Validation Loss: {best_val_loss:.4f}")
print("="*60)

# Finish wandb
wandb.finish()

print(f"\n✅ Training completato!")
print(f"Visualizza i risultati su: {wandb.run.url}")

In [None]:
# Plot training history
plot_training_history(train_losses, valid_losses, title='Training History - Feature Extraction')

In [None]:
# Evaluate on test set
import torch.nn.functional as F

# Load test dataset
test_dataset = CustomImageDataset('test_annotations.csv', test_path, transform=val_transform)
testloader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)

# Load best model
checkpoint = torch.load('./checkpoints/best_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Test
test_loss = 0
correct = 0
total = 0

with torch.no_grad():
    for data, target in testloader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        test_loss += F.cross_entropy(output, target, reduction='sum').item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        total += target.size(0)

test_loss /= total
test_accuracy = correct / total

print(f"\n{'='*60}")
print("TEST SET RESULTS")
print(f"{'='*60}")
print(f"Average loss: {test_loss:.4f}")
print(f"Accuracy: {correct}/{total} ({100. * test_accuracy:.2f}%)")
print(f"Error rate: {100. * (1 - test_accuracy):.2f}%")
print(f"{'='*60}\n")

## 6️⃣ (Opzionale) Salva Checkpoint su Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Copy checkpoints to Drive
!mkdir -p /content/drive/MyDrive/vgg16_checkpoints
!cp -r ./checkpoints/* /content/drive/MyDrive/vgg16_checkpoints/

print("✅ Checkpoints salvati su Google Drive!")