# CS:4420 Spring 2025 Project — FashionMNIST CNN

This notebook implements the solution for all 5 tasks as outlined in the project description.

## Task 1: Implementing CNN from Scratch

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

model = FashionCNN()
print(model)


FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)


## Task 2: Training and Testing CNN (Train/Val/Test Split)

In [2]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import numpy as np

torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([transforms.ToTensor()])

train_data = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_data = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

def train_val_test(val_split_ratio):
    total_train = len(train_data)
    val_size = int(val_split_ratio * total_train)
    train_size = total_train - val_size
    train_set, val_set = random_split(train_data, [train_size, val_size])

    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

    model = FashionCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)

    for epoch in range(10):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    correct = total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

results = {}
for ratio in [0.0, 0.1, 0.2, 0.3, 0.4]:
    acc = train_val_test(ratio)
    results[ratio] = acc

print("Validation Ratio vs Accuracy:")
for k, v in results.items():
    print(f"{int(k*100)}% Val: {v:.2f}%")


100%|██████████| 26.4M/26.4M [00:01<00:00, 13.5MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 197kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.69MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 13.1MB/s]


Validation Ratio vs Accuracy:
0% Val: 90.51%
10% Val: 91.09%
20% Val: 90.43%
30% Val: 89.78%
40% Val: 89.02%


From the above evaluation on the model's performance on the test set while varying the validation set size (0%, 10%, 20%, 30%, 40%) during training, the model achieved the highest accuracy with a **10% validation split** and With no validation split and the model performed reasonably well, but slightly worse than the 10% split.

Takeaway:

*   A small validation set (e.g., 10%) helps improve generalization without taking away too much training data.
*   Larger validation sets reduce training size, which may lead to underfitting and lower test performance.



## Task 3: Learning Rate Exploration

In [6]:
def test_learning_rates(rates, best_val_split):
    acc_results = {}
    for lr in rates:
        print(f"Testing LR: {lr}")
        total_train = len(train_data)
        val_size = int(best_val_split * total_train)
        train_size = total_train - val_size
        train_set, val_set = random_split(train_data, [train_size, val_size])
        train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
        test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

        model = FashionCNN().to(device)
        optimizer = optim.SGD(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(10):
            model.train()
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)#passing images to model to get output
                loss = criterion(outputs, labels)#loss value based on prediction and ground truth
                loss.backward()
                optimizer.step()#update models weight

        model.eval()
        correct = total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        acc_results[lr] = 100 * correct / total
    return acc_results

learning_rates = [0.001, 0.01, 0.1, 1, 10]
lr_results = test_learning_rates(learning_rates, best_val_split=0.1)

print("Learning Rate vs Accuracy:")
for k, v in lr_results.items():
    print(f"LR {k}: {v:.2f}%")


Testing LR: 0.001
Testing LR: 0.01
Testing LR: 0.1
Testing LR: 1
Testing LR: 10
Learning Rate vs Accuracy:
LR 0.001: 41.21%
LR 0.01: 82.49%
LR 0.1: 89.37%
LR 1: 10.00%
LR 10: 10.00%


Findings:
*  0.001 was too small — training was very slow and underfitting occurred.

*  0.01 provided decent learning but didn’t fully optimize performance.

*  0.1 was optimal — model trained efficiently and achieved the highest accuracy.

*  1 and 10 led to immediate performance breakdown — likely due to unstable gradients or exploding loss values, preventing the model from learning.

## Task 4: Training Algorithm Comparison (SGD vs Adam)

In [7]:
def compare_optimizers(lr, val_split=0.1):
    total_train = len(train_data)
    val_size = int(val_split * total_train)
    train_size = total_train - val_size
    train_set, val_set = random_split(train_data, [train_size, val_size])
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

    def train_with_optimizer(optimizer_fn):
        model = FashionCNN().to(device)
        optimizer = optimizer_fn(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        for epoch in range(10):
            model.train()
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
        return model

    sgd_model = train_with_optimizer(optim.SGD)
    adam_model = train_with_optimizer(optim.Adam)

    def test_model(model):
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        return 100 * correct / total

    return test_model(sgd_model), test_model(adam_model)

sgd_acc, adam_acc = compare_optimizers(lr=0.1)
print(f"SGD Accuracy: {sgd_acc:.2f}%, Adam Accuracy: {adam_acc:.2f}%")


SGD Accuracy: 90.48%, Adam Accuracy: 10.00%


Findings:
*  SGD significantly outperformed Adam in this experiment, achieving 90.48% test accuracy.

*  The Adam optimizer, despite being adaptive and typically more efficient, resulted in only 10% accuracy, indicating it failed to learn effectively under the same learning rate.

*  This performance gap suggests that Adam is more sensitive to learning rate tuning — the value of 0.1, which worked well for SGD, is likely too high for Adam, causing divergence or ineffective weight updates.

## Task 5: AUC for One-Class Evaluation

In [8]:
from sklearn.metrics import roc_auc_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch

total_train = len(train_data)
val_size = int(0.1 * total_train)
train_size = total_train - val_size
train_set, val_set = random_split(train_data, [train_size, val_size])
test_data = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)
transform = transforms.Compose([transforms.ToTensor()])

def train_with_optimizer(optimizer_fn):
        model = FashionCNN().to(device)
        optimizer = optimizer_fn(model.parameters(), lr=0.1)
        criterion = nn.CrossEntropyLoss()
        for epoch in range(10):
            model.train()
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
        return model

sgd_model = train_with_optimizer(optim.SGD)

model = sgd_model
model.eval()

y_true = []
y_scores = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)

        class2_probs = probs[:, 2].cpu().numpy()
        y_scores.extend(class2_probs)

        binary_labels = (labels == 2).int().numpy()
        y_true.extend(binary_labels)

auc_score = roc_auc_score(y_true, y_scores)
print(f"AUC Score for class '2' (positive vs rest): {auc_score:.4f}")


AUC Score for class '2' (positive vs rest): 0.9900


Findings:
*  An AUC score of 0.9900 indicates excellent discriminative ability — the model is highly effective at ranking true class-2 images above non-class-2 images.

*  This result suggests the model learned distinctive features for class "2", even though it was trained in a multi-class setting.