In [1]:
import numpy as np
import torch
from torch import nn, optim
import tqdm
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split

In [2]:
# Use the following code to load and normalize the dataset for training and testing
# It will downlad the dataset into data subfolder (change to your data folder name)
train_dataset = torchvision.datasets.FashionMNIST('data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

test_dataset = torchvision.datasets.FashionMNIST('data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

# Use the following code to create a validation set of 10%
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_dataset)),
    train_dataset.targets,
    stratify=train_dataset.targets,
    test_size=0.1,
)

# Generate training and validation subsets based on indices
train_split = Subset(train_dataset, train_indices)
val_split = Subset(train_dataset, val_indices)

# set batches sizes
train_batch_size = 512
test_batch_size = 256

# Define dataloader objects that help to iterate over batches and samples for
# training, validation and testing
train_batches = DataLoader(train_split, batch_size=train_batch_size, shuffle=True)
val_batches = DataLoader(val_split, batch_size=train_batch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)

In [3]:
# Adjustable parameters
num_train_batches = len(train_batches)
num_val_batches = len(val_batches)
num_test_batches = len(test_batches)
input_dim = 784  # Input dimension
output_dim = 10  # Output dimension (number of classes)
num_hidden_layers = 2  # Adjustable number of hidden layers
hidden_dim = [400, 400]  # Adjustable number of neurons in each hidden layer
learning_rate = 5e-2  # Adjustable learning rate
num_epochs = 15  # Adjustable number of epochs

In [None]:
# Define your (As Cool As It Gets) Fully Connected Neural Network 
class ACAIGFCN(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim):
        super(ACAIGFCN, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dim:
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.ReLU())
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [50]:
# Initialize neural network model with input, output and hidden layer dimensions
model = ACAIGFCN(input_dim, output_dim, num_hidden_layers, hidden_dim)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Iterate over epochs, batches with progress bar and train+ validate the ACAIGFCN
# Track the loss and validation accuracy
# ACAIGFCN Training
train_losses = []
val_accuracies = []
for epoch in range(num_epochs):
    # Set model into training mode
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm.tqdm(train_batches, desc=f'Epoch {epoch+1}/{num_epochs}'):
        optimizer.zero_grad()
        outputs = model(inputs.view(inputs.size(0), -1))
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_split)
    train_losses.append(epoch_loss)

    # ACAIGFCN Validation
    model.eval()
    correct = 0
    total = 0
    # Telling PyTorch we aren't passing inputs to network for training purpose
    with torch.no_grad():
        for inputs, labels in val_batches:
            outputs = model(inputs.view(inputs.size(0), -1))
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_accuracy = correct / total
    val_accuracies.append(val_accuracy)
    # Record accuracy for the epoch; print training loss, validation accuracy
    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss}, Validation Accuracy: {val_accuracy * 100:.2f}%')

# Testing
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for inputs, labels in test_batches:
        outputs = model(inputs.view(inputs.size(0), -1))
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = test_correct / test_total
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')



Epoch 1/15:   0%|          | 0/106 [00:00<?, ?it/s]

Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 40.66it/s]


Epoch 1/15, Training Loss: 1.0389211749500697, Validation Accuracy: 77.22%


Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 46.56it/s]


Epoch 2/15, Training Loss: 0.6003602162643715, Validation Accuracy: 80.83%


Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 47.03it/s]


Epoch 3/15, Training Loss: 0.5114392004189667, Validation Accuracy: 81.75%


Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 46.82it/s]


Epoch 4/15, Training Loss: 0.4723648733386287, Validation Accuracy: 82.93%


Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 44.49it/s]


Epoch 5/15, Training Loss: 0.4391096002172541, Validation Accuracy: 82.25%


Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 37.21it/s]


Epoch 6/15, Training Loss: 0.4230526761478848, Validation Accuracy: 83.80%


Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 44.30it/s]


Epoch 7/15, Training Loss: 0.40402703648143345, Validation Accuracy: 84.57%


Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 45.34it/s]


Epoch 8/15, Training Loss: 0.38834971942725005, Validation Accuracy: 86.43%


Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 44.56it/s]


Epoch 9/15, Training Loss: 0.3770780355577116, Validation Accuracy: 85.92%


Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 44.88it/s]


Epoch 10/15, Training Loss: 0.3691605603430006, Validation Accuracy: 85.12%


Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 41.53it/s]


Epoch 11/15, Training Loss: 0.3621551866001553, Validation Accuracy: 86.55%


Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 47.29it/s]


Epoch 12/15, Training Loss: 0.35045969994862874, Validation Accuracy: 86.07%


Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 47.06it/s]


Epoch 13/15, Training Loss: 0.3452149110811728, Validation Accuracy: 87.43%


Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 47.20it/s]


Epoch 14/15, Training Loss: 0.33604657580234387, Validation Accuracy: 86.48%


Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 46.56it/s]


Epoch 15/15, Training Loss: 0.32954068407305964, Validation Accuracy: 87.27%
Test Accuracy: 86.01%


In [None]:
# Plotting training loss curve
plt.plot(train_losses)
plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss Curve')
plt.show()

In [None]:
# Plot training loss curve and validation accuracy curve
plt.plot(train_losses, label='Training Loss')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.title('Training Loss and Validation Accuracy')
plt.legend()
plt.savefig('582hw4f1.pdf')
plt.show()

In [57]:
# Now, define a function based on the above model that allows us to quickly try out different optimizer
def train_model(optimizer, learning_rate, num_epochs=15):
    # Initialize model
    model = ACAIGFCN(input_dim, output_dim, num_hidden_layers, hidden_dim)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)

    # Training loop
    train_losses = []
    val_accuracies = []
    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm.tqdm(train_batches, desc=f'Epoch {epoch+1}/{num_epochs}'):
            optimizer.zero_grad()
            outputs = model(inputs.view(inputs.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_split)
        train_losses.append(epoch_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_batches:
                outputs = model(inputs.view(inputs.size(0), -1))
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

    # Testing
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_batches:
            outputs = model(inputs.view(inputs.size(0), -1))
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = test_correct / test_total

    return train_losses, val_accuracies, test_accuracy

In [6]:
optimizers = {
    'SGD': optim.SGD,
    'RMSProp': optim.RMSprop,
    'Adam': optim.Adam
}

learning_rates = [0.001, 0.01, 0.1]

In [59]:
results = {}

for optimizer_name, optimizer_func in optimizers.items():
    for lr in learning_rates:
        print(f'Training with {optimizer_name} optimizer and learning rate {lr}...')
        train_losses, val_accuracies, test_accuracy = train_model(optimizer_func, lr)
        results[(optimizer_name, lr)] = {
            'train_losses': train_losses,
            'val_accuracies': val_accuracies,
            'test_accuracy': test_accuracy
        }

Training with SGD optimizer and learning rate 0.001...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 42.38it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 46.44it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 45.88it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 46.94it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 47.08it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 43.43it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 41.89it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 39.31it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 45.10it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 45.73it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 44.32it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 46.82it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 41.96it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.87it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 47.10it/s]


Training with SGD optimizer and learning rate 0.01...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 47.45it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 44.79it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 46.85it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 46.35it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 46.95it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 47.20it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 47.21it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 46.72it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 47.45it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 47.62it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 42.94it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 47.48it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 46.23it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 47.44it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 47.16it/s]


Training with SGD optimizer and learning rate 0.1...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 47.10it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 47.08it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 47.20it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 47.13it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 46.80it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 45.67it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 46.40it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 47.33it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 47.85it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 46.13it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 46.92it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 45.18it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 46.50it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.50it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 42.74it/s]


Training with RMSProp optimizer and learning rate 0.001...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 46.25it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 44.29it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 46.42it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 46.27it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 46.26it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 41.92it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 44.30it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 45.95it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 46.17it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 43.62it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 44.55it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 45.63it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 46.40it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 45.64it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 45.42it/s]


Training with RMSProp optimizer and learning rate 0.01...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 45.82it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 46.20it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 46.07it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 42.52it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 40.80it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 37.60it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 41.54it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 45.40it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 46.45it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 45.78it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 45.53it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 42.69it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 43.63it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 42.20it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 45.81it/s]


Training with RMSProp optimizer and learning rate 0.1...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 45.37it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 45.08it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.81it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 44.92it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 43.98it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 45.21it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 42.28it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 44.74it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 41.29it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 43.64it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 44.21it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 45.12it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 45.04it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 45.45it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 45.44it/s]


Training with Adam optimizer and learning rate 0.001...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 44.75it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 44.81it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.54it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 42.21it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 42.85it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 44.39it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 44.60it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 43.97it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 44.31it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 44.55it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 44.86it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 44.67it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 43.37it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 40.09it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 41.14it/s]


Training with Adam optimizer and learning rate 0.01...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 40.78it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 41.08it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.74it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 44.03it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 41.53it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 40.50it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 42.10it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 40.73it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 39.13it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 44.13it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 43.75it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 44.30it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 43.90it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 43.94it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 43.76it/s]


Training with Adam optimizer and learning rate 0.1...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 42.70it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 42.98it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 41.06it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 42.55it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 42.13it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 41.78it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 37.81it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 39.69it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 41.01it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 40.25it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 40.49it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 38.27it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 40.40it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 39.93it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 39.85it/s]


In [None]:
# Plot and compare the performance of different optimizers
plt.figure(figsize=(12, 8))
for (optimizer_name, lr), result in results.items():
    plt.plot(result['train_losses'], label=f'{optimizer_name} (lr={lr:.3f})')

plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss Curves for Different Optimizers and Learning Rates')
plt.legend()
plt.show()

plt.figure(figsize=(12, 8))
for (optimizer_name, lr), result in results.items():
    plt.plot(result['val_accuracies'], label=f'{optimizer_name} (lr={lr:.3f})')

plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Curves for Different Optimizers and Learning Rates')
plt.legend()
plt.savefig('582hw4f2.pdf')
plt.show()

# Print test accuracies
print('\nTest Accuracies:')
for (optimizer_name, lr), result in results.items():
    print(f'{optimizer_name} (lr={lr:.3f}): {result["test_accuracy"] * 100:.2f}%')


In [3]:
# Define a new FCN that takes dropout probability as one of the parameters
class ACAIGFCNWithDropout(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim, dropout_prob=0.5):
        super(ACAIGFCNWithDropout, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dim:
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_prob))
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [4]:
def train_model_with_dropout(optimizer, learning_rate, num_epochs=15, dropout_prob=0.5):
    # Initialize model with dropout regularization
    model = ACAIGFCNWithDropout(input_dim, output_dim, num_hidden_layers, hidden_dim, dropout_prob)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)

    # Training loop
    train_losses = []
    val_accuracies = []
    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm.tqdm(train_batches, desc=f'Epoch {epoch+1}/{num_epochs}'):
            optimizer.zero_grad()
            outputs = model(inputs.view(inputs.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_split)
        train_losses.append(epoch_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_batches:
                outputs = model(inputs.view(inputs.size(0), -1))
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

    # Testing
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_batches:
            outputs = model(inputs.view(inputs.size(0), -1))
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = test_correct / test_total

    return train_losses, val_accuracies, test_accuracy

In [7]:
dropout_results = {}

for optimizer_name, optimizer_func in optimizers.items():
    for lr in learning_rates:
        print(f'Training with {optimizer_name} optimizer and learning rate {lr}...')
        dropout_train_losses, dropout_val_accuracies, dropout_test_accuracy = train_model_with_dropout(optimizer_func, lr)
        dropout_results[(optimizer_name, lr)] = {
            'train_losses': dropout_train_losses,
            'val_accuracies': dropout_val_accuracies,
            'test_accuracy': dropout_test_accuracy
        }

Training with SGD optimizer and learning rate 0.001...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 39.99it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 43.56it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.65it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 45.09it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 44.26it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 43.18it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 44.18it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 43.10it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 42.31it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 41.30it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 42.30it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 44.30it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 40.91it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 41.40it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 42.90it/s]


Training with SGD optimizer and learning rate 0.01...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 42.62it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 43.88it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 43.18it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 43.87it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 41.18it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 40.95it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 39.57it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 40.47it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 39.68it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 40.53it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 41.50it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 38.88it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 37.41it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 38.43it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 41.35it/s]


Training with SGD optimizer and learning rate 0.1...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 44.01it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 43.93it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.89it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 42.47it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 41.23it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 42.17it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 41.83it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 43.64it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 40.93it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 43.37it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 42.38it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 43.91it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 41.42it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.39it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 44.66it/s]


Training with RMSProp optimizer and learning rate 0.001...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 43.87it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 41.53it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 42.40it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 43.80it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 43.54it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 44.09it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 43.61it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 43.89it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 42.65it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 43.86it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 43.57it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 43.92it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 43.42it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.08it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 43.54it/s]


Training with RMSProp optimizer and learning rate 0.01...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 42.70it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 43.78it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 39.04it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 43.29it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 42.41it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 39.20it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 39.03it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 41.90it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 43.54it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 43.95it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 41.98it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 40.99it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 39.12it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 42.41it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 38.75it/s]


Training with RMSProp optimizer and learning rate 0.1...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 41.74it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 42.88it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 43.11it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 43.42it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 43.70it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 44.34it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 40.83it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 44.02it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 43.62it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 42.87it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 42.97it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 41.11it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 42.42it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 40.31it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 41.98it/s]


Training with Adam optimizer and learning rate 0.001...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 41.60it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 40.84it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 40.80it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 42.17it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 41.78it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:03<00:00, 33.98it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 42.15it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 41.00it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 40.60it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 42.16it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 39.42it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 39.76it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 38.90it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 42.19it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 40.34it/s]


Training with Adam optimizer and learning rate 0.01...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 39.70it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 41.15it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 37.64it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:03<00:00, 34.87it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 38.32it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 40.05it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 39.03it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 38.31it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 38.47it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 39.42it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 37.17it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 40.18it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 39.87it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 38.64it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 40.70it/s]


Training with Adam optimizer and learning rate 0.1...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 38.48it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 39.30it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 35.85it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 37.62it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 40.48it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 40.54it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 39.99it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 39.04it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 36.57it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 40.09it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 39.26it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 42.10it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 38.76it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 39.07it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 39.66it/s]


In [None]:
# Plotting
plt.figure(figsize=(12, 8))
for (optimizer_name, lr), result in dropout_results.items():
    plt.plot(result['train_losses'], label=f'{optimizer_name} (lr={lr:.3f})')

plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss Curves with Dropout Regularization')
plt.legend()
plt.savefig('582hw4f4.pdf')
plt.show()

plt.figure(figsize=(12, 8))
for (optimizer_name, lr), result in dropout_results.items():
    plt.plot(result['val_accuracies'], label=f'{optimizer_name} (lr={lr:.3f})')

plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Curves with Dropout Regularization')
plt.legend()
plt.savefig('582hw4f3.pdf')
plt.show()

# Print test accuracies
print('\nTest Accuracies with Dropout Regularization:')
for (optimizer_name, lr), result in dropout_results.items():
    print(f'{optimizer_name} (lr={lr:.3f}): {result["test_accuracy"] * 100:.2f}%')

In [9]:
# Define a new FCN to test different initializations
class ACAIGFCNWithInitialization(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim, initialization):
        super(ACAIGFCNWithInitialization, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dim:
            layer = nn.Linear(prev_dim, dim)
            initialization(layer.weight.data)
            layers.append(layer)
            layers.append(nn.ReLU())
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [10]:
def train_model_with_initialization(optimizer, learning_rate, initialization, num_epochs=15):
    # Initialize model with specified initialization
    model = ACAIGFCNWithInitialization(input_dim, output_dim, num_hidden_layers, hidden_dim, initialization)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)

    # Training loop
    train_losses = []
    val_accuracies = []
    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm.tqdm(train_batches, desc=f'Epoch {epoch+1}/{num_epochs}'):
            optimizer.zero_grad()
            outputs = model(inputs.view(inputs.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_split)
        train_losses.append(epoch_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_batches:
                outputs = model(inputs.view(inputs.size(0), -1))
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

    # Testing
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_batches:
            outputs = model(inputs.view(inputs.size(0), -1))
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = test_correct / test_total

    return train_losses, val_accuracies, test_accuracy

In [13]:
initializations = {
    'Random Normal': nn.init.normal_,
    'Xavier Normal': nn.init.xavier_normal_,
    'Kaiming Uniform': nn.init.kaiming_uniform_
}

results_initialization = {}
# Given my laptop capacity, I modified the baseline configuration and picked Adam optimizer with learning rate 0.001 to test the initializations
# as they are the best performer
for initialization_name, initialization_func in initializations.items():
    print(f'Training with Adam optimizer, learning rate 0.001, and initialization {initialization_name}...')
    train_losses, val_accuracies, test_accuracy = train_model_with_initialization(optim.Adam, 0.001, initialization_func)
    results_initialization[(optim.Adam, 0.001, initialization_name)] = {
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'test_accuracy': test_accuracy
    }


Training with Adam optimizer, learning rate 0.001, and initialization Random Normal...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 42.40it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 43.89it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.64it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 45.24it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 42.22it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 44.03it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 41.81it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 44.57it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 44.17it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 44.12it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 43.77it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 43.77it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 44.91it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 46.04it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 45.94it/s]


Training with Adam optimizer, learning rate 0.001, and initialization Xavier Normal...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 40.39it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 42.22it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 44.73it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 44.58it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 46.05it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 45.75it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 43.82it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 44.32it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 43.02it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 42.20it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 45.22it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 45.01it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 45.69it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 45.74it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 45.69it/s]


Training with Adam optimizer, learning rate 0.001, and initialization Kaiming Uniform...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 45.22it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 44.00it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 39.45it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 37.05it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 40.47it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 42.16it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 40.50it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 41.65it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 42.73it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 42.16it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 40.84it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 44.40it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 42.59it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 43.07it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 44.74it/s]


In [18]:
for initialization_name, initialization_func in initializations.items():
    print(f'Test Accuracy with {initialization_name} Initialization: {test_accuracy * 100:.2f}%')

Test Accuracy with Random Normal Initialization: 88.96%
Test Accuracy with Xavier Normal Initialization: 88.96%
Test Accuracy with Kaiming Uniform Initialization: 88.96%


In [4]:
# # Define a new FCN to test batch normalization, not considering initializations
# class ACAIGFCNWithNormalization(nn.Module):
#     def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim):
#         super(ACAIGFCNWithNormalization, self).__init__()
#         layers = []
#         prev_dim = input_dim
#         for dim in hidden_dim:
#             layers.append(nn.Linear(prev_dim, dim))
#             layers.append(nn.BatchNorm1d(dim))  # Batch Normalization
#             layers.append(nn.ReLU())
#             prev_dim = dim
#         layers.append(nn.Linear(prev_dim, output_dim))
#         self.model = nn.Sequential(*layers)

#     def forward(self, x):
#         return self.model(x)

In [20]:
# # Given my laptop capacity, I modified the baseline configuration and picked Adam optimizer with learning rate 0.001 as they are the best performer
# def train_model_with_normalization(num_epochs=15):
#     # Initialize model with Batch Normalization
#     model = ACAIGFCNWithNormalization(input_dim, output_dim, num_hidden_layers, hidden_dim)

#     # Loss and optimizer
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=0.001)

#     # Training loop
#     train_losses = []
#     val_accuracies = []
#     for epoch in range(num_epochs):
#         # Training
#         model.train()
#         running_loss = 0.0
#         for inputs, labels in train_batches:
#             optimizer.zero_grad()
#             outputs = model(inputs.view(inputs.size(0), -1))
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()
#             running_loss += loss.item() * inputs.size(0)
#         epoch_loss = running_loss / len(train_split)
#         train_losses.append(epoch_loss)

#         # Validation
#         model.eval()
#         correct = 0
#         total = 0
#         with torch.no_grad():
#             for inputs, labels in val_batches:
#                 outputs = model(inputs.view(inputs.size(0), -1))
#                 _, predicted = torch.max(outputs, 1)
#                 total += labels.size(0)
#                 correct += (predicted == labels).sum().item()
#         val_accuracy = correct / total
#         val_accuracies.append(val_accuracy)

#     # Testing
#     model.eval()
#     test_correct = 0
#     test_total = 0
#     with torch.no_grad():
#         for inputs, labels in test_batches:
#             outputs = model(inputs.view(inputs.size(0), -1))
#             _, predicted = torch.max(outputs, 1)
#             test_total += labels.size(0)
#             test_correct += (predicted == labels).sum().item()
#     test_accuracy = test_correct / test_total

#     return train_losses, val_accuracies, test_accuracy

In [21]:
# # Train model with Batch Normalization
# train_losses_with_normalization, val_accuracies_with_normalization, test_accuracy_with_normalization = train_model_with_normalization()

# print(f'Test Accuracy with Batch Normalization: {test_accuracy_with_normalization * 100:.2f}%')


Test Accuracy with Batch Normalization: 88.84%


In [7]:
class ACAIGFCNWithNormalization(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim, initialization):
        super(ACAIGFCNWithNormalization, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dim:
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.BatchNorm1d(dim))  # Batch Normalization
            layers.append(nn.ReLU())
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.model = nn.Sequential(*layers)

        # Initialization
        for m in self.model.modules():
            if isinstance(m, nn.Linear):
                initialization(m.weight)  

    def forward(self, x):
        return self.model(x)

In [9]:
def train_model_with_normalization(optimizer, learning_rate, initialization, num_epochs=15):
    # Initialize model with specified initialization
    model = ACAIGFCNWithNormalization(input_dim, output_dim, num_hidden_layers, hidden_dim, initialization)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)

    # Training loop
    train_losses = []
    val_accuracies = []
    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm.tqdm(train_batches, desc=f'Epoch {epoch+1}/{num_epochs}'):
            optimizer.zero_grad()
            outputs = model(inputs.view(inputs.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_split)
        train_losses.append(epoch_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_batches:
                outputs = model(inputs.view(inputs.size(0), -1))
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

    # Testing
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_batches:
            outputs = model(inputs.view(inputs.size(0), -1))
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = test_correct / test_total

    return train_losses, val_accuracies, test_accuracy


In [10]:
initializations = {
    'Random Normal': nn.init.normal_,
    'Xavier Normal': nn.init.xavier_normal_,
    'Kaiming Uniform': nn.init.kaiming_uniform_
}
results_initialization = {}

for initialization_name, initialization_func in initializations.items():
    print(f'Training with Adam optimizer, learning rate 0.001, initialization {initialization_name}, and Batch Normalization...')
    train_losses, val_accuracies, test_accuracy = train_model_with_normalization(optim.Adam, 0.001, initialization_func)
    results_initialization[(initialization_name)] = {
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'test_accuracy': test_accuracy
    }
    print(f'Test Accuracy with {initialization_name} Initialization: {test_accuracy * 100:.2f}%')


Training with Adam optimizer, learning rate 0.001, initialization Random Normal, and Batch Normalization...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 40.38it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 45.99it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 46.28it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 43.45it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 41.10it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 42.16it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 43.94it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 43.77it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 41.84it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 43.47it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 41.37it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 42.50it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 39.90it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.64it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 43.74it/s]


Test Accuracy with Random Normal Initialization: 84.78%
Training with Adam optimizer, learning rate 0.001, initialization Xavier Normal, and Batch Normalization...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 44.86it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 45.39it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 41.99it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 42.17it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 40.27it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 38.34it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 44.50it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 41.55it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 42.54it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 45.09it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 45.39it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 43.38it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 45.11it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.09it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 45.24it/s]


Test Accuracy with Xavier Normal Initialization: 88.75%
Training with Adam optimizer, learning rate 0.001, initialization Kaiming Uniform, and Batch Normalization...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 36.78it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 37.59it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 42.80it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 41.03it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 42.34it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 43.61it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 42.05it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 44.71it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:03<00:00, 32.74it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:03<00:00, 32.11it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 41.10it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 44.27it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 43.51it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 42.86it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 43.02it/s]


Test Accuracy with Kaiming Uniform Initialization: 88.50%
