Chelsea Jaculina

DATA 255 Assignment #6

October 27, 2025

In [1]:
# ================================================
# 1. SETUP AND IMPORTS
# ================================================

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import zipfile
import os
import time
import random
import numpy as np
import shutil

# For reproducibility
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)
np.random.seed(seed)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

Using device: cuda
GPU: NVIDIA A100-SXM4-80GB
GPU Memory: 79.32 GB


In [2]:
# ================================================
# 2. MOUNT GOOGLE DRIVE & EXTRACT DATA
# ================================================

from google.colab import drive
drive.mount('/content/drive')

train_zip_path = "/content/drive/MyDrive/MSDA 2024-2026/04 Fall 2025/DATA 255 - Deep Learning/HW6/imagenet_train20a.zip"
val_zip_path = "/content/drive/MyDrive/MSDA 2024-2026/04 Fall 2025/DATA 255 - Deep Learning/HW6/imagenet_val20.zip"

def extract_and_find_data(zip_path, expected_name):
    """
    Extract zip file and find the actual data directory.
    Handles nested directory structures.
    """
    print(f"\nExtracting {expected_name}...")

    # Extract to temporary directory
    temp_extract_dir = f"temp_{expected_name}"

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(temp_extract_dir)

    # Find the directory with class folders
    def find_dataset_dir(root_dir):
        """Recursively find directory containing class folders."""
        for dirpath, dirnames, filenames in os.walk(root_dir):
            # Check if this directory has subdirectories (potential classes)
            if dirnames and not any(d.startswith('.') for d in dirnames):
                # Check if subdirectories contain image files
                first_subdir = os.path.join(dirpath, dirnames[0])
                if os.path.isdir(first_subdir):
                    files = os.listdir(first_subdir)
                    if any(f.lower().endswith(('.jpg', '.jpeg', '.png', '.JPEG')) for f in files):
                        return dirpath
        return None

    dataset_dir = find_dataset_dir(temp_extract_dir)

    if dataset_dir is None:
        raise FileNotFoundError(f"Could not find dataset directory in {temp_extract_dir}")

    # Move to expected location
    if os.path.exists(expected_name):
        shutil.rmtree(expected_name)

    shutil.move(dataset_dir, expected_name)

    # Clean up temp directory
    if os.path.exists(temp_extract_dir):
        shutil.rmtree(temp_extract_dir)

    # Verify
    class_folders = [d for d in os.listdir(expected_name)
                     if os.path.isdir(os.path.join(expected_name, d)) and not d.startswith('.')]
    print(f"✓ Found {len(class_folders)} class folders in {expected_name}")

    return expected_name

# Extract training data
if not os.path.exists("imagenet_train20a"):
    train_dir = extract_and_find_data(train_zip_path, "imagenet_train20a")
else:
    print("\nTraining data already extracted.")
    train_dir = "imagenet_train20a"

# Extract validation data
if not os.path.exists("imagenet_val20"):
    val_dir = extract_and_find_data(val_zip_path, "imagenet_val20")
else:
    print("\nValidation data already extracted.")
    val_dir = "imagenet_val20"

print(f"\n{'='*70}")
print("Data extraction complete!")
print(f"{'='*70}")

Mounted at /content/drive

Extracting imagenet_train20a...
✓ Found 20 class folders in imagenet_train20a

Extracting imagenet_val20...
✓ Found 1 class folders in imagenet_val20

Data extraction complete!


In [3]:
# ================================================
# 3. VERIFY DATA STRUCTURE
# ================================================

print(f"\n{'='*70}")
print("VERIFYING DATA STRUCTURE")
print(f"{'='*70}")

def verify_dataset(data_dir, name):
    """Verify dataset structure and count images."""
    print(f"\n{name}:")
    print("-" * 70)

    if not os.path.exists(data_dir):
        print(f"ERROR: Directory {data_dir} does not exist!")
        return False

    class_folders = sorted([d for d in os.listdir(data_dir)
                           if os.path.isdir(os.path.join(data_dir, d)) and not d.startswith('.')])

    if len(class_folders) == 0:
        print(f"ERROR: No class folders found in {data_dir}!")
        return False

    print(f"Number of classes: {len(class_folders)}")
    print(f"Class folders: {class_folders[:5]}..." if len(class_folders) > 5 else f"Class folders: {class_folders}")

    total_images = 0
    for i, class_folder in enumerate(class_folders[:3]):  # Check first 3
        class_path = os.path.join(data_dir, class_folder)
        images = [f for f in os.listdir(class_path)
                 if f.lower().endswith(('.jpg', '.jpeg', '.png', '.JPEG'))]
        print(f"  Class {i} ({class_folder}): {len(images)} images")
        total_images += len(images)

    # Estimate total
    avg_per_class = total_images / min(3, len(class_folders))
    estimated_total = int(avg_per_class * len(class_folders))
    print(f"  Estimated total images: ~{estimated_total}")

    return True

train_ok = verify_dataset("imagenet_train20a", "Training Dataset")
val_ok = verify_dataset("imagenet_val20", "Validation Dataset")

if not (train_ok and val_ok):
    raise RuntimeError("Data verification failed! Check the zip files and extraction.")

print(f"\n{'='*70}")
print("✓ Data verification passed!")
print(f"{'='*70}")


VERIFYING DATA STRUCTURE

Training Dataset:
----------------------------------------------------------------------
Number of classes: 20
Class folders: ['n01737021', 'n02006656', 'n02011460', 'n02013706', 'n02033041']...
  Class 0 (n01737021): 300 images
  Class 1 (n02006656): 300 images
  Class 2 (n02011460): 300 images
  Estimated total images: ~6000

Validation Dataset:
----------------------------------------------------------------------
Number of classes: 1
Class folders: ['imagenet_val20']
  Class 0 (imagenet_val20): 1000 images
  Estimated total images: ~1000

✓ Data verification passed!


In [4]:
# ================================================
# 4. DATA LOADING - CLEAN AUGMENTATION
# ================================================

# ImageNet normalization
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# MODERATE augmentation - NOT TOO AGGRESSIVE
train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),  # Simple random crop, not aggressive
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Validation transforms (no augmentation)
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Load datasets
train_dataset = datasets.ImageFolder(root="imagenet_train20a", transform=train_transforms)
val_dataset = datasets.ImageFolder(root="imagenet_val20", transform=val_transforms)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                         num_workers=4, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                       num_workers=4, pin_memory=True, persistent_workers=True)

print(f"\n{'='*70}")
print(f"Dataset Loaded - CLEAN DATA")
print(f"{'='*70}")
print(f"Training samples:   {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Number of classes:  {len(train_dataset.classes)}")
print(f"Batch size:         {batch_size}")
print(f"Augmentation:       MINIMAL (flip + random crop only)")
print(f"{'='*70}")



Dataset Loaded - CLEAN DATA
Training samples:   6000
Validation samples: 1000
Number of classes:  20
Batch size:         32
Augmentation:       MINIMAL (flip + random crop only)


In [5]:
# ================================================
# 5. VGG11 WITH PRETRAINED WEIGHTS - OPTIMIZED
# ================================================

import torchvision.models as models

# Load pretrained VGG11
model = models.vgg11(weights=models.VGG11_Weights.IMAGENET1K_V1)

# FREEZE first 4 blocks of features (keep only last block trainable)
freeze_count = 0
for i, param in enumerate(model.features.parameters()):
    if i < len(list(model.features.parameters())) - 6:  # Freeze all but last block
        param.requires_grad = False
        freeze_count += 1

# MUCH SMALLER classifier head to prevent overfitting
num_classes = 20
model.classifier = nn.Sequential(
    nn.Dropout(p=0.4),
    nn.Linear(512 * 7 * 7, 512),  # Reduced: 25088 -> 512
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.3),
    nn.Linear(512, num_classes)  # Direct to output
)

model = model.to(device)

print(f"\n{'='*70}")
print("VGG11 Pretrained - Optimized for Validation Accuracy")
print(f"{'='*70}")
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen conv layers: {freeze_count} ({freeze_count*100//len(list(model.features.parameters())):.0f}%)")
print(f"Classifier: 25088 -> 512 -> 20 (very small!)")
print(f"Dropout: 0.4 -> 0.3 (prevents overfitting)")
print(f"{'='*70}")


Downloading: "https://download.pytorch.org/models/vgg11-8a719046.pth" to /root/.cache/torch/hub/checkpoints/vgg11-8a719046.pth


100%|██████████| 507M/507M [00:02<00:00, 242MB/s]



VGG11 Pretrained - Optimized for Validation Accuracy
Total parameters: 22,076,308
Trainable parameters: 19,935,252
Frozen conv layers: 10 (62%)
Classifier: 25088 -> 512 -> 20 (very small!)
Dropout: 0.4 -> 0.3 (prevents overfitting)


In [11]:
# ================================================
# 6. FINE-TUNING SETUP - PREVENT OVERFITTING
# ================================================

criterion = nn.CrossEntropyLoss()

# Conservative training for better validation generalization
initial_lr = 0.0005  # Very low - only tune last layer
optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=0.9, weight_decay=0.001)

num_epochs = 8
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 7], gamma=0.5)

print(f"\n{'='*70}")
print("Training Setup - Minimize Overfitting")
print(f"{'='*70}")
print(f"Learning rate: {initial_lr} (very conservative)")
print(f"Weight decay: 0.001 (L2 regularization)")
print(f"Optimizer: SGD with momentum 0.9")
print(f"Epochs: {num_epochs}")
print(f"Scheduler: MultiStepLR (soft drops)")
print(f"\nStrategy:")
print(f"  - Freeze most conv layers (learn features from ImageNet)")
print(f"  - Small classifier (512 units only)")
print(f"  - Low learning rate (minimal updates)")
print(f"  - Moderate dropout (0.4 -> 0.3)")
print(f"  - Clean data (minimal augmentation)")
print(f"  - Goal: Better validation generalization")
print(f"{'='*70}")



Training Setup - Minimize Overfitting
Learning rate: 0.0005 (very conservative)
Weight decay: 0.001 (L2 regularization)
Optimizer: SGD with momentum 0.9
Epochs: 8
Scheduler: MultiStepLR (soft drops)

Strategy:
  - Freeze most conv layers (learn features from ImageNet)
  - Small classifier (512 units only)
  - Low learning rate (minimal updates)
  - Moderate dropout (0.4 -> 0.3)
  - Clean data (minimal augmentation)
  - Goal: Better validation generalization


In [7]:
# ================================================
# 7. TRAINING FUNCTIONS
# ================================================

def calculate_accuracy(model, loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

def train_one_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if (batch_idx + 1) % 25 == 0 or (batch_idx + 1) == len(train_loader):
            batch_acc = 100 * correct / total
            avg_loss = running_loss / (batch_idx + 1)
            print(f"  [{batch_idx + 1:3d}/{len(train_loader)}] "
                  f"Loss: {loss.item():.4f} | Avg: {avg_loss:.4f} | Acc: {batch_acc:5.2f}%", end='\r')

    print()
    return running_loss / len(train_loader)

print("Training functions ready.")

Training functions ready.


In [8]:
# ================================================
# 8. MAIN TRAINING LOOP
# ================================================

print(f"\n{'='*70}")
print("STARTING TRAINING - FAST CONVERGENCE MODE")
print(f"{'='*70}\n")

best_val_acc = 0.0
best_train_acc = 0.0
start_time = time.time()
target_reached_epoch = None

for epoch in range(num_epochs):
    epoch_start = time.time()
    current_lr = optimizer.param_groups[0]['lr']

    print(f"Epoch {epoch + 1}/{num_epochs} | LR: {current_lr:.6f}")
    print("-" * 70)

    avg_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
    train_acc = calculate_accuracy(model, train_loader, device)
    val_acc = calculate_accuracy(model, val_loader, device)
    scheduler.step()

    improved = ""
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        improved = "NEW BEST!"
    if train_acc > best_train_acc:
        best_train_acc = train_acc

    epoch_time = time.time() - epoch_start

    print(f"\n{'='*70}")
    print(f"EPOCH {epoch + 1} SUMMARY {improved}")
    print(f"{'='*70}")
    print(f"  Loss:           {avg_loss:.4f}")
    print(f"  Train Accuracy: {train_acc:6.2f}% (best: {best_train_acc:6.2f}%)")
    print(f"  Val Accuracy:   {val_acc:6.2f}% (best: {best_val_acc:6.2f}%)")
    print(f"  Time:           {epoch_time:.1f}s")
    print(f"{'='*70}\n")

    if val_acc >= 50 and target_reached_epoch is None:
        target_reached_epoch = epoch + 1
        print(f"TARGET REACHED at epoch {target_reached_epoch}!\n")

    if train_acc > 50 and val_acc > 50:
        print(f"REQUIREMENT MET! Both accuracies > 50%\n")

total_time = time.time() - start_time
minutes, seconds = divmod(total_time, 60)

print(f"\n{'='*70}")
print("TRAINING COMPLETE")
print(f"{'='*70}")
print(f"Total time: {int(minutes)}m {int(seconds)}s")
print(f"Best train accuracy: {best_train_acc:.2f}%")
print(f"Best val accuracy:   {best_val_acc:.2f}%")
if target_reached_epoch:
    print(f"Val 50% reached at:  Epoch {target_reached_epoch}")
print(f"{'='*70}")


STARTING TRAINING - FAST CONVERGENCE MODE

Epoch 1/8 | LR: 0.000500
----------------------------------------------------------------------
  [188/188] Loss: 2.5832 | Avg: 2.8133 | Acc: 17.43%

EPOCH 1 SUMMARY NEW BEST!
  Loss:           2.8133
  Train Accuracy:  49.22% (best:  49.22%)
  Val Accuracy:     3.00% (best:   3.00%)
  Time:           23.4s

Epoch 2/8 | LR: 0.000500
----------------------------------------------------------------------
  [188/188] Loss: 1.6685 | Avg: 2.2650 | Acc: 44.57%

EPOCH 2 SUMMARY 
  Loss:           2.2650
  Train Accuracy:  70.87% (best:  70.87%)
  Val Accuracy:     2.40% (best:   3.00%)
  Time:           21.4s

Epoch 3/8 | LR: 0.000500
----------------------------------------------------------------------
  [188/188] Loss: 1.1464 | Avg: 1.7155 | Acc: 63.20%

EPOCH 3 SUMMARY NEW BEST!
  Loss:           1.7155
  Train Accuracy:  81.18% (best:  81.18%)
  Val Accuracy:     5.20% (best:   5.20%)
  Time:           22.0s

Epoch 4/8 | LR: 0.000500
----------

In [13]:
# ================================================
# 9. FINAL EVALUATION
# ================================================

print(f"\n{'='*70}")
print("FINAL EVALUATION")
print(f"{'='*70}")

final_train_acc = calculate_accuracy(model, train_loader, device)
final_val_acc = calculate_accuracy(model, val_loader, device)

print(f"\nFinal Training Accuracy:   {final_train_acc:6.2f}%")
print(f"Final Validation Accuracy: {final_val_acc:6.2f}%")
print(f"\nBest Training Accuracy:    {best_train_acc:6.2f}%")
print(f"Best Validation Accuracy:  {best_val_acc:6.2f}%")



FINAL EVALUATION

Final Training Accuracy:    90.75%
Final Validation Accuracy:   5.80%

Best Training Accuracy:     90.45%
Best Validation Accuracy:    6.30%
