# **UIowa CS:4420 Project 1 Deep Learning on FashionMNIST**<br>
Alex Jacobs<br>
Code created with guidance from tutorials provided by Dr. Muchao Ye for<br>the course.<br>
Important Note: This code is tailored for being run on terminals provided by Google Colab.

# **Task 1**
Implementation of CNN<br>
Input: 28x28 Greyscale<br>


In [None]:
!pip install torcheval
from torcheval.metrics import BinaryAUROC
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader

# Define data transformations
transform = transforms.Compose([
    transforms.Resize(28),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the FashionMNIST dataset
full_train_dataset = torchvision.datasets.FashionMNIST(
    root='./data', train=True, download=True, transform=transform
)

test_dataset = torchvision.datasets.FashionMNIST(
    root='./data', train=False, download=True, transform=transform
)

# Define the neural network for FashionMNIST
class FashionMNISTNet(nn.Module):
    def __init__(self):
        super(FashionMNISTNet, self).__init__()
        #Initial size 28x28
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        #Size 1 = 28x28
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        #Size 2 = 14x14
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=2)
        #Size 3 = 16x16
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        #Size 4 = 8x8
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        #Size 5 = 8x8
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        #Final Size = 4x4
        self.fc1 = nn.Linear(in_features=64 * 4 * 4, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        scores = self.softmax(x)
        return x

# IMPORTANT CODE BELOW
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Create an instance of the neural network
net = FashionMNISTNet()
print(net)
# Move the model to the GPU if available
net.to(device)


Using device: cuda


FashionMNISTNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)

# **Task 2**
**Training without Validation Set**
* Epochs = 11
* Batch Size = 32
* Learning Rate in SGD = 0.1
* Training Set Size = Full Training Dataset

In [None]:
NUM_EPOCHS = 11
# Train on full train dataset
train_dataset = full_train_dataset

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Train: {len(train_dataset)} | Test: {len(test_dataset)}")

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)

# Training loop
for epoch in range(NUM_EPOCHS):
    print("Training Epoch: ", epoch + 1)
    net.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / (i + 1)
    print("Average Loss: ", avg_loss)

print("Training Finished.")

# Test the neural network
correct = 0
total = 0

net.eval()

# Disable gradient calculation
with torch.no_grad():
    for inputs, labels in test_loader:

        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the total number of samples and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")


Train: 60000 | Test: 10000
Training Epoch:  1
Average Loss:  0.6189665213425954
Training Epoch:  2
Average Loss:  0.3278081944982211
Training Epoch:  3
Average Loss:  0.26878975222706797
Training Epoch:  4
Average Loss:  0.23615353941619396
Training Epoch:  5
Average Loss:  0.21119665132860344
Training Epoch:  6
Average Loss:  0.19270971008886895
Training Epoch:  7
Average Loss:  0.1759480779826641
Training Epoch:  8
Average Loss:  0.15984519085114202
Training Epoch:  9
Average Loss:  0.1474567214558522
Training Epoch:  10
Average Loss:  0.13491636175786456
Training Epoch:  11
Average Loss:  0.12463298247394462
Training Finished.
Accuracy: 91.23%


**Training with Validation Set**

In [None]:
NUM_EPOCHS
#test using TRAIN_SET_SZIE 0.9, 0.8, 0.7, 0.6
TRAIN_SET_SZIE = 0.9
#TRAIN_SET_SZIE = 0.8
#TRAIN_SET_SZIE = 0.7
#TRAIN_SET_SZIE = 0.6

# Split full_train_dataset into training and validation sets (80%/20%)
train_size = int(TRAIN_SET_SZIE * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

# saving the model with the best validation accuracy
best_val_acc = 0.0
best_model_path = "best_fashionmnist_model.pth"

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)

# Training loop
for epoch in range(NUM_EPOCHS):
    print("Training Epoch: ", epoch + 1)
    net.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / (i + 1)
    print("Average Loss: ", avg_loss)

    # --- Validation ---
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(net.state_dict(), best_model_path)
        print(f"✅ New best model saved with val acc: {val_acc:.2f}%")

    print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Loss: {avg_loss:.4f}, Val Acc: {val_acc:.2f}%")

print("Training finished.")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

# Load the saved model and Set the model to evaluation mode
net.load_state_dict(torch.load("best_fashionmnist_model.pth"))
net.eval()

# Test the neural network
correct = 0
total = 0

net.eval()

# Disable gradient calculation
with torch.no_grad():
    for inputs, labels in test_loader:

        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the total number of samples and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

Results from training the model without a validation set:<br>
Train: 60000 | Test: 10000<br>

Training Epoch:  1<br>
Average Loss:  0.654186999575297<br>
Training Epoch:  2<br>
Average Loss:  0.31929630262851716<br>
Training Epoch:  3<br>
Average Loss:  0.2617528966650367<br>
Training Epoch:  4<br>
Average Loss:  0.2281300640384356<br>
Training Epoch:  5<br>
Average Loss:  0.2042231744448344<br>
Training Epoch:  6<br>
Average Loss:  0.18343615941877167<br>
Training Epoch:  7<br>
Average Loss:  0.16709979757765928<br>
Training Epoch:  8<br>
Average Loss:  0.15363689922106763<br>
Training Epoch:  9<br>
Average Loss:  0.14050291257823508<br>
Training Epoch:  10<br>
Average Loss:  0.12784666278598208<br>
Training Epoch:  11<br>
Average Loss:  0.11665267256665975<br>

Test Accuracy Result:<br>
Accuracy: 91.75%<br>

Training and Validation of Model using 90%, 10% split between training and validation data:<br>

Train: 54000 | Val: 6000 | Test: 10000 <br>

✅ New best model saved with val acc: 87.17%<br>
Epoch [1/11], Loss: 0.6349, Val Acc: 87.17%<br>
✅ New best model saved with val acc: 87.58%<br>
Epoch [2/11], Loss: 0.3328, Val Acc: 87.58%<br>
✅ New best model saved with val acc: 89.93%<br>
Epoch [3/11], Loss: 0.2759, Val Acc: 89.93%<br>
✅ New best model saved with val acc: 90.45%<br>
Epoch [4/11], Loss: 0.2425, Val Acc: 90.45%<br>
Epoch [5/11], Loss: 0.2170, Val Acc: 90.10%<br>
✅ New best model saved with val acc: 91.83%<br>
Epoch [6/11], Loss: 0.1965, Val Acc: 91.83%<br>
Epoch [7/11], Loss: 0.1766, Val Acc: 91.00%<br>
Epoch [8/11], Loss: 0.1618, Val Acc: 91.10%<br>
Epoch [9/11], Loss: 0.1483, Val Acc: 91.50%<br>
Epoch [10/11], Loss: 0.1365, Val Acc: 91.42%<br>
✅ New best model saved with val acc: 92.08%<br>
Epoch [11/11], Loss: 0.1228, Val Acc: 92.08%<br>
Training finished.<br>
Best Validation Accuracy: 92.08%<br>
Accuracy: 91.83%

Training and Validation of Model using 80%, 20% split between training and validation data:<br>

Train: 48000 | Val: 12000 | Test: 10000<br>

✅ New best model saved with val acc: 86.15%<br>
Epoch [1/11], Loss: 0.6720, Val Acc: 86.15%<br>
✅ New best model saved with val acc: 88.38%<br>
Epoch [2/11], Loss: 0.3396, Val Acc: 88.38%<br>
Epoch [3/11], Loss: 0.2813, Val Acc: 87.63%<br>
✅ New best model saved with val acc: 89.60%<br>
Epoch [4/11], Loss: 0.2476, Val Acc: 89.60%<br>
✅ New best model saved with val acc: 90.53%<br>
Epoch [5/11], Loss: 0.2202, Val Acc: 90.53%<br>
✅ New best model saved with val acc: 91.31%<br>
Epoch [6/11], Loss: 0.2002, Val Acc: 91.31%<br>
Epoch [7/11], Loss: 0.1795, Val Acc: 91.14%<br>
Epoch [8/11], Loss: 0.1652, Val Acc: 91.13%<br>
✅ New best model saved with val acc: 91.47%<br>
Epoch [9/11], Loss: 0.1499, Val Acc: 91.47%<br>
✅ New best model saved with val acc: 91.52%<br>
Epoch [10/11], Loss: 0.1373, Val Acc: 91.52%<br>
Epoch [11/11], Loss: 0.1260, Val Acc: 91.15%<br>
Training finished.<br>
Best Validation Accuracy: 91.52%<br>
Accuracy: 91.24%


Training and Validation of Model using 70%, 30% split between training and validation data:<br>
Train: 42000 | Val: 18000 | Test: 10000<br>

✅ New best model saved with val acc: 83.73%<br>
Epoch [1/11], Loss: 0.6996, Val Acc: 83.73%<br>
Epoch [2/11], Loss: 0.3495, Val Acc: 83.53%<br>
✅ New best model saved with val acc: 88.91%<br>
Epoch [3/11], Loss: 0.2893, Val Acc: 88.91%<br>
✅ New best model saved with val acc: 90.19%<br>
Epoch [4/11], Loss: 0.2530, Val Acc: 90.19%<br>
Epoch [5/11], Loss: 0.2265, Val Acc: 88.84%<br>
Epoch [6/11], Loss: 0.2081, Val Acc: 90.19%<br>
Epoch [7/11], Loss: 0.1893, Val Acc: 90.09%<br>
✅ New best model saved with val acc: 91.13%<br>
Epoch [8/11], Loss: 0.1743, Val Acc: 91.13%<br>
Epoch [9/11], Loss: 0.1583, Val Acc: 90.46%<br>
✅ New best model saved with val acc: 91.18%<br>
Epoch [10/11], Loss: 0.1482, Val Acc: 91.18%<br>
Epoch [11/11], Loss: 0.1325, Val Acc: 90.88%<br>
Training finished.<br>
Best Validation Accuracy: 91.18%<br>
Accuracy: 91.08%

Training and Validation of Model using 60%, 40% split between training and validation data:<br>

Train: 36000 | Val: 24000 | Test: 10000<br>

✅ New best model saved with val acc: 84.08%<br>
Epoch [1/11], Loss: 0.8012, Val Acc: 84.08%<br>
✅ New best model saved with val acc: 86.35%<br>
Epoch [2/11], Loss: 0.3829, Val Acc: 86.35%<br>
✅ New best model saved with val acc: 89.65%<br>
Epoch [3/11], Loss: 0.3174, Val Acc: 89.65%<br>
Epoch [4/11], Loss: 0.2754, Val Acc: 89.34%<br>
✅ New best model saved with val acc: 89.89%<br>
Epoch [5/11], Loss: 0.2441, Val Acc: 89.89%<br>
✅ New best model saved with val acc: 90.22%<br>
Epoch [6/11], Loss: 0.2236, Val Acc: 90.22%<br>
✅ New best model saved with val acc: 91.36%<br>
Epoch [7/11], Loss: 0.2039, Val Acc: 91.36%<br>
Epoch [8/11], Loss: 0.1863, Val Acc: 91.34%<br>
Epoch [9/11], Loss: 0.1686, Val Acc: 91.28%<br>
✅ New best model saved with val acc: 91.93%<br>
Epoch [10/11], Loss: 0.1550, Val Acc: 91.93%<br>
Epoch [11/11], Loss: 0.1396, Val Acc: 91.13%<br>
Training finished.<br>
Best Validation Accuracy: 91.93%<br>
Accuracy: 91.14%<br>

**Findings and analysis:<br>**
Based on the results of running the training at different percentages, it seemed the best accuracy came from using a 90% training data - 10% validation data split. It is worth noting that the variations had relatively close validation accuracy, and it's possible that with repeated trials or with different values for batch sizing and number of epochs to have different results. It'd be worth trying with different values and multiple runs to get a clearer result and a statistically proveable result.

---



# **Task 3<br>**
Batch Size                  = 32<br>
Number of Epochs            = 11<br>
Training - Validation Split = 90% - 10%<br>
Train: 54000 | Val: 6000 | Test: 10000<br>
Learning Rate stepped between 0.001, 0.01, 0.1, 1, 10

---







In [None]:
TRAIN_SET_SZIE = 0.9
NUM_EPOCHS = 11
# Split full_train_dataset into training and validation sets (80%/20%)
train_size = int(TRAIN_SET_SZIE * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

# saving the model with the best validation accuracy
best_val_acc = 0.0
best_model_path = "best_fashionmnist_model.pth"

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
#test with LEARNING_RATE = 0.001, 0.01, 0.1, 1, 10
optimizer = optim.SGD(net.parameters(), lr=0.001)

# Training loop
for epoch in range(NUM_EPOCHS):
    print("Training Epoch: ", epoch + 1)
    net.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / (i + 1)
    print("Average Loss: ", avg_loss)

    # --- Validation ---
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(net.state_dict(), best_model_path)
        print(f"✅ New best model saved with val acc: {val_acc:.2f}%")

    print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Loss: {avg_loss:.4f}, Val Acc: {val_acc:.2f}%")

print("Training finished.")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

# Load the saved model and Set the model to evaluation mode
net.load_state_dict(torch.load("best_fashionmnist_model.pth"))
net.eval()

# Test the neural network
correct = 0
total = 0

net.eval()

# Disable gradient calculation
with torch.no_grad():
    for inputs, labels in test_loader:

        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the total number of samples and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

Training the model using a Learning rate of 0.001:<br>

✅ New best model saved with val acc: 19.65%<br>
Epoch [1/11], Loss: 2.3002, Val Acc: 19.65%<br>
✅ New best model saved with val acc: 21.00%<br>
Epoch [2/11], Loss: 2.2907, Val Acc: 21.00%<br>
✅ New best model saved with val acc: 42.87%<br>
Epoch [3/11], Loss: 2.2651, Val Acc: 42.87%<br>
✅ New best model saved with val acc: 49.98%<br>
Epoch [4/11], Loss: 2.0634, Val Acc: 49.98%<br>
✅ New best model saved with val acc: 68.20%<br>
Epoch [5/11], Loss: 1.1677, Val Acc: 68.20%<br>
✅ New best model saved with val acc: 72.37%<br>
Epoch [6/11], Loss: 0.7959, Val Acc: 72.37%<br>
✅ New best model saved with val acc: 72.60%<br>
Epoch [7/11], Loss: 0.7046, Val Acc: 72.60%<br>
✅ New best model saved with val acc: 76.15%<br>
Epoch [8/11], Loss: 0.6602, Val Acc: 76.15%<br>
✅ New best model saved with val acc: 76.98%<br>
Epoch [9/11], Loss: 0.6304, Val Acc: 76.98%<br>
✅ New best model saved with val acc: 77.68%<br>
Epoch [10/11], Loss: 0.6078, Val Acc: 77.68%<br>
Epoch [11/11], Loss: 0.5908, Val Acc: 77.33%<br>
Training finished.<br>
Best Validation Accuracy: 77.68%<br>
Accuracy: 77.28%<br>

Training the model using a Learning rate of 0.01:<br>

✅ New best model saved with val acc: 73.82%<br>
Epoch [1/11], Loss: 1.5512, Val Acc: 73.82%<br>
✅ New best model saved with val acc: 81.67%<br>
Epoch [2/11], Loss: 0.5830, Val Acc: 81.67%<br>
Epoch [3/11], Loss: 0.4789, Val Acc: 80.05%<br>
✅ New best model saved with val acc: 84.32%<br>
Epoch [4/11], Loss: 0.4188, Val Acc: 84.32%<br>
✅ New best model saved with val acc: 84.70%<br>
Epoch [5/11], Loss: 0.3814, Val Acc: 84.70%<br>
✅ New best model saved with val acc: 87.35%<br>
Epoch [6/11], Loss: 0.3529, Val Acc: 87.35%<br>
Epoch [7/11], Loss: 0.3318, Val Acc: 86.98%<br>
✅ New best model saved with val acc: 88.92%<br>
Epoch [8/11], Loss: 0.3132, Val Acc: 88.92%<br>
Epoch [9/11], Loss: 0.2999, Val Acc: 88.87%<br>
Epoch [10/11], Loss: 0.2867, Val Acc: 88.40%<br>
✅ New best model saved with val acc: 89.05%<br>
Epoch [11/11], Loss: 0.2750, Val Acc: 89.05%<br>
Training finished.<br>
Best Validation Accuracy: 89.05%<br>
Accuracy: 88.41%<br>

Training the model using a Learning rate of 0.1:<br>

✅ New best model saved with val acc: 84.95%<br>
Epoch [1/11], Loss: 0.6647, Val Acc: 84.95%<br>
✅ New best model saved with val acc: 89.23%<br>
Epoch [2/11], Loss: 0.3356, Val Acc: 89.23%<br>
✅ New best model saved with val acc: 89.65%<br>
Epoch [3/11], Loss: 0.2753, Val Acc: 89.65%<br>
✅ New best model saved with val acc: 90.50%<br>
Epoch [4/11], Loss: 0.2407, Val Acc: 90.50%<br>
✅ New best model saved with val acc: 91.17%<br>
Epoch [5/11], Loss: 0.2161, Val Acc: 91.17%<br>
✅ New best model saved with val acc: 91.40%<br>
Epoch [6/11], Loss: 0.1938, Val Acc: 91.40%<br>
Epoch [7/11], Loss: 0.1775, Val Acc: 88.27%<br>
✅ New best model saved with val acc: 92.28%<br>
Epoch [8/11], Loss: 0.1624, Val Acc: 92.28%<br>
Epoch [9/11], Loss: 0.1458, Val Acc: 91.32%<br>
Epoch [10/11], Loss: 0.1362, Val Acc: 91.77%<br>
✅ New best model saved with val acc: 92.38%<br>
Epoch [11/11], Loss: 0.1227, Val Acc: 92.38%<br>
Training finished.<br>
Best Validation Accuracy: 92.38%<br>
Accuracy: 91.39%<br>

Training the model using a Learning rate of 1:<br>

✅ New best model saved with val acc: 9.57%<br>
Epoch [1/11], Loss: 2.3073, Val Acc: 9.57%<br>
✅ New best model saved with val acc: 10.53%<br>
Epoch [2/11], Loss: 2.3105, Val Acc: 10.53%<br>
Epoch [3/11], Loss: 2.3101, Val Acc: 9.60%<br>
Epoch [4/11], Loss: 2.3102, Val Acc: 9.98%<br>
Epoch [5/11], Loss: 2.3100, Val Acc: 9.95%<br>
Epoch [6/11], Loss: 2.3107, Val Acc: 10.00%<br>
Epoch [7/11], Loss: 2.3101, Val Acc: 10.10%<br>
Epoch [8/11], Loss: 2.3100, Val Acc: 9.95%<br>
Epoch [9/11], Loss: 2.3104, Val Acc: 10.13%<br>
Epoch [10/11], Loss: 2.3094, Val Acc: 10.00%<br>
Epoch [11/11], Loss: 2.3096, Val Acc: 9.57%<br>
Training finished.<br>
Best Validation Accuracy: 10.53%<br>
Accuracy: 10.00%

Training the model using a Learning rate of 10:<br>

✅ New best model saved with val acc: 10.25%<br>
Epoch [1/11], Loss: 2.4786, Val Acc: 10.25%<br>
Epoch [2/11], Loss: 2.4661, Val Acc: 9.80%<br>
Epoch [3/11], Loss: 2.4739, Val Acc: 9.43%<br>
Epoch [4/11], Loss: 2.4743, Val Acc: 9.97%<br>
Epoch [5/11], Loss: 2.4753, Val Acc: 9.80%<br>
Epoch [6/11], Loss: 2.4688, Val Acc: 10.03%<br>
Epoch [7/11], Loss: 2.4680, Val Acc: 9.80%<br>
Epoch [8/11], Loss: 2.4697, Val Acc: 9.97%<br>
Epoch [9/11], Loss: 2.4786, Val Acc: 10.22%<br>
Epoch [10/11], Loss: 2.4742, Val Acc: 10.25%<br>
Epoch [11/11], Loss: 2.4696, Val Acc: 9.90%<br>
Training finished.<br>
Best Validation Accuracy: 10.25%<br>
Accuracy: 10.00%

**Findings and Analysis:<br>**
While it started off quite weak, the results with LR = 0.001 improved surprisingly quickly. Although they began to taper off and finish in the high 70% range, it did continue to increase through the Epochs. If the improvements towards the optimum were faster, it would be a useful rate. Unfortunately, the speed at which LR = 0.01 and 0.1 approached an optimum value far exceed the time required for LR = 0.001 to be feasible.<br>
The results from LR = 0.01 were impressive, and seem like it would benefit from a having a large Epoch number, as it was still making fairly consititent improvements towards the 90% range. I wonder if there is a point where this rate competes with LR = 0.1 in terms of accuracy vs time.<br>
LR = 0.1 performed the strongest out of the 5 tests. It started at quite a high percentage rate, and showed consistent improvements through the 11 Epochs. I'm curious to see how the value would perform at different sizes of Batches and/or Epochs.<br>
LR = 1 and LR = 10 both had similar , and poor, results. They repeatedly bounced around 9-10% accuracy, almost alternating in a pendulum fashion. This clearly demonstrated the information discussed in Lecture 23 slide 31, where having too high of a step size will jump over the optimum value.

---

# **Task 4**<br>

Changing to the ADAM training algorithm.<br>
Batch Size                  = 32<br>
Number of Epochs            = 11<br>
Training - Validation Split = 90% - 10%<br>
Train: 54000 | Val: 6000 | Test: 10000<br>
Learning Rate               = 0.1<br>

Prediction: I expect using the Adam training algorithm to return improved results. The adaptive nature should lead to improvements in comparrison to SGD.


In [None]:
NUM_EPOCHS = 11
# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.1)

# saving the model with the best validation accuracy
best_val_acc = 0.0
best_model_path = "best_fashionmnist_model.pth"

# Training loop
for epoch in range(NUM_EPOCHS):
    net.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / (i + 1)

    # --- Validation ---
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(net.state_dict(), best_model_path)
        print(f"✅ New best model saved with val acc: {val_acc:.2f}%")

    print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Loss: {avg_loss:.4f}, Val Acc: {val_acc:.2f}%")

print("Training finished.")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

# Test the neural network
correct = 0
total = 0

# Load the saved model and Set the model to evaluation mode
#net.load_state_dict(torch.load("best_fashionmnist_model.pth"))
net.eval()

# Disable gradient calculation
with torch.no_grad():
    for inputs, labels in test_loader:

        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the total number of samples and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")


✅ New best model saved with val acc: 9.88%
Epoch [1/11], Loss: 16.5248, Val Acc: 9.88%


KeyboardInterrupt: 

Results of using Adam training algorithm with the same parameters as SGD:<br>

Train: 54000 | Val: 6000 | Test: 10000<br>

✅ New best model saved with val acc: 9.57%<br>
Epoch [1/11], Loss: 73.4058, Val Acc: 9.57%<br>
✅ New best model saved with val acc: 10.47%<br>
Epoch [2/11], Loss: 2.3154, Val Acc: 10.47%<br>
Epoch [3/11], Loss: 2.3150, Val Acc: 9.57%<br>
Epoch [4/11], Loss: 2.3150, Val Acc: 9.65%<br>
Epoch [5/11], Loss: 2.3158, Val Acc: 9.57%<br>
✅ New best model saved with val acc: 10.58%<br>
Epoch [6/11], Loss: 2.3153, Val Acc: 10.58%<br>
Epoch [7/11], Loss: 2.3155, Val Acc: 9.57%<br>
✅ New best model saved with val acc: 11.00%<br>
Epoch [8/11], Loss: 2.3149, Val Acc: 11.00%<br>
Epoch [9/11], Loss: 2.3148, Val Acc: 9.57%<br>
Epoch [10/11], Loss: 2.3158, Val Acc: 10.47%<br>
Epoch [11/11], Loss: 2.3154, Val Acc: 9.35%<br>
Training finished.<br>
Best Validation Accuracy: 11.00%<br>
Accuracy: 10.00% <br>

I am surprised by these results! I have even run this a few times to verify I had it set up correct. These results are very similar to the results I found when running SGD with a higher learning rate. <br>

**I decided to run the model with ADAM at LR = 0.001:<br>**

Train: 54000 | Val: 6000 | Test: 10000<br>

✅ New best model saved with val acc: 87.97%<br>
Epoch [1/11], Loss: 0.4663, Val Acc: 87.97%<br>
✅ New best model saved with val acc: 88.85%<br>
Epoch [2/11], Loss: 0.2876, Val Acc: 88.85%<br>
✅ New best model saved with val acc: 91.57%<br>
Epoch [3/11], Loss: 0.2425, Val Acc: 91.57%<br>
Epoch [4/11], Loss: 0.2119, Val Acc: 91.50%<br>
✅ New best model saved with val acc: 92.00%<br>
Epoch [5/11], Loss: 0.1879, Val Acc: 92.00%<br>
Epoch [6/11], Loss: 0.1670, Val Acc: 91.70%<br>
Epoch [7/11], Loss: 0.1503, Val Acc: 91.67%<br>
✅ New best model saved with val acc: 92.25%<br>
Epoch [8/11], Loss: 0.1307, Val Acc: 92.25%<br>
Epoch [9/11], Loss: 0.1182, Val Acc: 92.18%<br>
Epoch [10/11], Loss: 0.1046, Val Acc: 91.98%<br>
Epoch [11/11], Loss: 0.0951, Val Acc: 91.65%<br>
Training finished.<br>
Best Validation Accuracy: 92.25%<br>
Accuracy: 91.66%<br>

These results are closer to what I expected when I began. It seems that ADAM begins running into the "looping" behavior at a much lower rate than SGD does. I suppose that is why it is preferred. It would allow for the ability to make smaller adjustments while maintaining the same speed, so it should generally have a higher overall accuracy than SGD can reliably attain.

---

# **Task 4**

Using AUROC to evaluate model performance of CNN trained with ADAM<br>

Batch Size                  = 32<br>
Number of Epochs            = 11<br>
Training - Validation Split = 90% - 10%<br>
Train: 54000 | Val: 6000 | Test: 10000<br>
Learning Rate               = 0.001<br>
I decided to change the learning rate to 0.001 and to use the ADAM training algorithm, as it seemed to consistently get an equal or higher accuracy compared to SGD.<br>
I think the AUROC results will be decently high, maybe high 80%-mid 90%'s, because the model has a fairly high accuracy rate overall.


In [None]:
#Define the positive class
#The other classes will be negative
TRUTH_CLASS = 2

# Test the neural network
correct = 0
total = 0
auc_metric = BinaryAUROC()

# Load the saved model and Set the model to evaluation mode
net.load_state_dict(torch.load("best_fashionmnist_model.pth"))
net.eval()

# Disable gradient calculation
with torch.no_grad():
    for inputs, labels in test_loader:

        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the total number of samples and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # --- AUROC ---
        binary_ground_truth = (labels == TRUTH_CLASS).float()
        binary_predictions = outputs[:, TRUTH_CLASS]

        auc_metric.update(binary_predictions, binary_ground_truth)

#compute Area Under Reciever Operating Characteristic Curve
auc_final = auc_metric.compute()
print(f"Area Under Curve Results for: {auc_final:.2f}")

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

Area Under Curve Results for: 0.96
Accuracy: 91.29%


**Results and Analysis**<br>

Area Under Curve Results: 0.99<br>
Accuracy: 91.29%<br>

I am not sure that I have implemented the AUROC correctly. A value of 0.99 with an accuracy of 91.29% just seems too high. Since the AUROC is testing for how many true positives are flagged in comparison to false positives, I suppose it makes sense that it would be able to fairly reliable not mislabel a particular class. <br>
I'd be interested in seeing the results for all classes to see if this holds across them.<br>
I tried a few other classes, and most were similarly high. Here is a result from setting 6 as the positive class:<br>
Area Under Curve Results for: 0.96<br>
Accuracy: 91.29%<br>

# **Final Build of CNN using ADAM and AUC**
*   Batch Size = 32
*   Epochs = 11
*   Learning Rate in SGD = 0.001
*   Positive/Truth Class = 2
*   Training Data - Validation Data split: 90% - 10% (based on trials from 90% - 10% to 60% - 40%)<br>
Split between Training and Validation data can be altered by setting TRAIN_SET_SIZE value.




In [None]:
!pip install torcheval
from torcheval.metrics import BinaryAUROC
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader

#Precentage of the training data used for Training
#Precentage of the validation set = 1 - TRAIN_SET_SZIE
TRAIN_SET_SZIE = 0.9
#Define minibatch size
BATCH_VAL = 128
#Define the positive class
#The other classes will be negative
TRUTH_CLASS = 2
NUM_EPOCHS = 11

# Define data transformations
transform = transforms.Compose([
    transforms.Resize(28),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the FashionMNIST dataset
full_train_dataset = torchvision.datasets.FashionMNIST(
    root='./data', train=True, download=True, transform=transform
)

test_dataset = torchvision.datasets.FashionMNIST(
    root='./data', train=False, download=True, transform=transform
)

# Split full_train_dataset into training and validation sets (80%/20%)
train_size = int(TRAIN_SET_SZIE * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_VAL, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_VAL, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_VAL, shuffle=False, num_workers=2)

print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

# Define the neural network for FashionMNIST
class FashionMNISTNet(nn.Module):
    def __init__(self):
        super(FashionMNISTNet, self).__init__()
        #Initial size 28x28
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        #Size 1 = 28x28
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        #Size 2 = 14x14
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=2)
        #Size 3 = 16x16
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        #Size 4 = 8x8
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        #Size 5 = 8x8
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        #Final Size = 4x4
        self.fc1 = nn.Linear(in_features=64 * 4 * 4, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        scores = self.softmax(x)
        return x

# IMPORTANT CODE BELOW
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Create an instance of the neural network
net = FashionMNISTNet()

# Move the model to the GPU if available
net.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# saving the model with the best validation accuracy
best_val_acc = 0.0
best_model_path = "best_fashionmnist_model.pth"

# Training loop
for epoch in range(NUM_EPOCHS):
    net.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / (i + 1)

    # --- Validation ---
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(net.state_dict(), best_model_path)
        print(f"✅ New best model saved with val acc: {val_acc:.2f}%")

    print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Loss: {avg_loss:.4f}, Val Acc: {val_acc:.2f}%")

print("Training finished.")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

# Test the neural network
correct = 0
total = 0
auc_metric = BinaryAUROC()

# Load the saved model and Set the model to evaluation mode
net.load_state_dict(torch.load("best_fashionmnist_model.pth"))
net.eval()

# Disable gradient calculation
with torch.no_grad():
    for inputs, labels in test_loader:

        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the total number of samples and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # --- AUROC ---
        binary_ground_truth = (labels == TRUTH_CLASS).float()
        binary_predictions = outputs[:, TRUTH_CLASS]
        auc_metric.update(binary_predictions, binary_ground_truth)

#Calculate AUROC results
auc_results = auc_metric.compute()
print(f"Area Under Curve Results: {auc_results:.2f}")
# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

Train: 54000 | Val: 6000 | Test: 10000
Using device: cuda
✅ New best model saved with val acc: 88.53%
Epoch [1/11], Loss: 0.4795, Val Acc: 88.53%
✅ New best model saved with val acc: 90.10%
Epoch [2/11], Loss: 0.2892, Val Acc: 90.10%
✅ New best model saved with val acc: 91.43%
Epoch [3/11], Loss: 0.2397, Val Acc: 91.43%
Epoch [4/11], Loss: 0.2078, Val Acc: 91.32%
Epoch [5/11], Loss: 0.1849, Val Acc: 91.00%
✅ New best model saved with val acc: 91.62%
Epoch [6/11], Loss: 0.1619, Val Acc: 91.62%
✅ New best model saved with val acc: 92.03%
Epoch [7/11], Loss: 0.1465, Val Acc: 92.03%
✅ New best model saved with val acc: 92.27%
Epoch [8/11], Loss: 0.1314, Val Acc: 92.27%
✅ New best model saved with val acc: 92.57%
Epoch [9/11], Loss: 0.1139, Val Acc: 92.57%
Epoch [10/11], Loss: 0.1029, Val Acc: 92.50%
Epoch [11/11], Loss: 0.0929, Val Acc: 92.53%
Training finished.
Best Validation Accuracy: 92.57%
Area Under Curve Results: 0.99
Accuracy: 91.29%
