<a href="https://colab.research.google.com/github/dtminnick/pytorch-practice/blob/main/assignment1part2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Deep Learning - Manually Graded Assignment 1 - Part 2 (CIFAR Dataset)**

Donnie Minnick

October 2025

# Import Libraries

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torchvision
from torchvision.transforms.functional import to_pil_image
from torchvision.utils import make_grid
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

# Load CIFAR Data

In [9]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Get training data.

training_data = torchvision.datasets.CIFAR10(root = './data',
                                             train = True,
                                             download = True,
                                             transform = transform)

# Get testing data.

testing_data = torchvision.datasets.CIFAR10(root = './data',
                                            train = False,
                                            download = True,
                                            transform = transform)


Confirm download of training data.

In [7]:
print("Training set size:", len(training_data))

Training set size: 50000


Confirm download of testing data.

In [10]:
print("Testing set size:", len(testing_data))

Testing set size: 10000


# Split Data

Split the data into training, validation and testing sets.

A testing set already exists; randomly take 10,000 images from the training set and reserve them as a validation set.

In [12]:
train_size = 40000
val_size = 10000

training_data, validation_data = random_split(training_data, [train_size, val_size])

Confirm splits.

In [13]:
print("Training set size:", len(training_data))
print("Validation set size:", len(validation_data))
print("Testing set size:", len(testing_data))

Training set size: 40000
Validation set size: 10000
Testing set size: 10000


# Build a CNN Network

Build a CNN network with convolution layers to classify the images.

## Setup Data Loaders

Batch training and validation data with batch_size = 64. Shuffle training data for generalization; validation data is not shuffled, ensuring consistent evaluation.

In [None]:
batch_size = 64

training_loader = DataLoader(training_data, batch_size = batch_size, shuffle = True)

validation_loader = DataLoader(validation_data, batch_size = batch_size, shuffle = False)

testing_loader = DataLoader(testing_data, batch_size = batch_size, shuffle = False)

# Implement Sweep Strategy

Implement a sweep strategy to find the optimal hyperparameters to maximize accuracy.

Systematically vary key hyperparameters - kernel size, stride, batch size, and learning rate - to assess their impact on training loss, validation accuracy, and feature map evolution.  This modular sweep enables principled experimentation, helping identify optimal architecture and training settings for improved model performance and stakeholder clarity.

## Create Sweep Grid

Create a sweep grid to explore four key hyperparameters that influence CNN performance.

In [None]:
sweep_grid = {'kernel_size': [3, 5],
              'stride': [1, 2],
              'batch_size': [32, 64],
              'learning_rate': [0.001, 0.01]}

**Kernel size** [3, 5] tests the impact of local versus broader spatial feature extraction.

**Stride** [1, 2] assesses how spatial resolution and downsizing effect learning.

**Batch size** [32, 64] evaluates gradient stability, generalization, and training efficiency.

**Learning rate** [0.001, 0.01] measures convergence speed and sensitivity to weight updates.

This grid yields 16 unique configurations (2 * 2 * 2 * 2), each to be trained and evaluated to compare loss, accuracy, and feature map evolution.

The goal is to identify the optimal combinations for model performance.

## Compute Flattened Size Function

Function to compute the current input size for the model.

In [None]:
def compute_flattened_size(model, input_shape = (1, 28, 28)):
  with torch.no_grad():
    dummy = torch.zeros(1, *input_shape)
    output = model(dummy)
    return output.view(1, -1).shape[1]

## Create Model Builder

Implement modular construction of a CNN with the specified kernel size and stride.

In [None]:
def build_model(kernel_size, stride):
  feature_extractor = nn.Sequential(
      nn.Conv2d(1, 6, kernel_size = kernel_size, stride = stride),
      nn.ReLU(),
      nn.Conv2d(6, 16, kernel_size = kernel_size, stride = stride),
      nn.ReLU(),
      nn.Flatten()
  )

  flattened_size = compute_flattened_size(feature_extractor)

  return nn.Sequential(
      feature_extractor,
      nn.Linear(flattened_size, 10),
      nn.Softmax(dim = 1)
  )

## Create DataLoader Factory

Create data loaders with consistent batching to keep data handling clean and configurable.

In [None]:
def create_dataloaders(training_data, validation_data, batch_size):
  training_loader = DataLoader(training_data, batch_size = batch_size, shuffle = True)
  validation_loader = DataLoader(validation_data, batch_size = batch_size, shuffle = False)
  testing_loader = DataLoader(testing_data, batch_size = batch_size, shuffle = False)
  return training_loader, validation_loader, testing_loader

## Create Train Model Function

Function to train the model and return final loss and validation accuracy; modular design for reuse across sweeps.

In [None]:
def train_model(model, training_loader, validation_loader, lr, device = "cpu", epochs = 5):
  model.to(device)
  optimizer = optim.Adam(model.parameters(), lr = lr)
  criterion = nn.NLLLoss()

  for epoch in range(epochs):
    model.train()
    for images, labels in training_loader:
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      output = model(images)
      loss = criterion(output, labels)
      loss.backward()
      optimizer.step()

  model.eval()
  correct, total = 0, 0

  with torch.no_grad():
    for images, labels in validation_loader:
      images, labels = images.to(device), labels.to(device)
      output = model(images)
      pred = output.argmax(dim = 1)
      correct += (pred == labels).sum().item()
      total += labels.size(0)

  val_accuracy = 100 * correct / total
  return loss.item(), val_accuracy

## Create Sweep Runner Function

Function to execute the full sweep across all parameter combinations.  Log results for analysis.

In [None]:
def run_sweep(training_data, validation_data, sweep_grid):
  results = []

  for k in sweep_grid["kernel_size"]:
    for s in sweep_grid["stride"]:
      for b in sweep_grid["batch_size"]:
        for lr in sweep_grid["learning_rate"]:
          print(f"Running: kernel_size = {k}, stride = {s}, batch_size = {b}, learning_rate = {lr}")
          model = build_model(k, s)
          training_loader, validation_loader = create_dataloaders(training_data, validation_data, b)
          final_loss, val_acc = train_model(model, training_loader, validation_loader, lr)
          results.append({
              "kernel_size": k,
              "stride": s,
              "batch_size": b,
              "learning_rate": lr,
              "final_loss": round(final_loss, 4),
              "val_acc": round(val_acc, 2)
          })

  return results

## Create Preview Results Function

Display top-performing configurations for quick insight and stakeholder reporting.

In [None]:
def preview_results(results, top_n = 10):
  df = pd.DataFrame(results)
  print(df.sort_values(by = "val_acc", ascending = False).head(top_n))