In [1]:
import numpy as np
import torch
from torch import nn
import tqdm

In [2]:
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split

from torch import optim
import torch.nn.functional as F

In [3]:
# Use the following code to load and normalize the dataset for training and testing
# It will downlad the dataset into data subfolder (change to your data folder name)
train_dataset = torchvision.datasets.FashionMNIST('data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

test_dataset = torchvision.datasets.FashionMNIST('data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))


# Use the following code to create a validation set of 10%
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_dataset)),
    train_dataset.targets,
    stratify=train_dataset.targets,
    test_size=0.1,
)

# Generate training and validation subsets based on indices
train_split = Subset(train_dataset, train_indices)
val_split = Subset(train_dataset, val_indices)


# set batches sizes
train_batch_size = 512 #Define train batch size
test_batch_size  = 256 #Define test batch size (can be larger than train batch size)


# Define dataloader objects that help to iterate over batches and samples for
# training, validation and testing
train_batches = DataLoader(train_split, batch_size=train_batch_size, shuffle=True)
val_batches = DataLoader(val_split, batch_size=train_batch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)
                                           
num_train_batches=len(train_batches)
num_val_batches=len(val_batches)
num_test_batches=len(test_batches)


print(num_train_batches)
print(num_val_batches)
print(num_test_batches)


#Sample code to visulaize the first sample in first 16 batches 

# batch_num = 0
# for train_features, train_labels in train_batches:
    
#     if batch_num == 16:
#         break    # break here
    
#     batch_num = batch_num +1
#     print(f"Feature batch shape: {train_features.size()}")
#     print(f"Labels batch shape: {train_labels.size()}")
    
#     img = train_features[0].squeeze()
#     label = train_labels[0]
#     plt.imshow(img, cmap="gray")
#     plt.show()
#     print(f"Label: {label}")



# Sample code to plot N^2 images from the dataset
# def plot_images(XX, N, title):
#     fig, ax = plt.subplots(N, N, figsize=(8, 8))
    
#     for i in range(N):
#       for j in range(N):
#         ax[i,j].imshow(XX[(N)*i+j], cmap="Greys")
#         ax[i,j].axis("off")
#     fig.suptitle(title, fontsize=24)

# plot_images(train_dataset.data[:64], 8, "First 64 Training Images" )

    

106
12
40


In [4]:
# design CNN model with convolutional, pooling, and FC layers
class CNN(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        
        # Calculate the input dimension for the fully connected layers based on the output size of the convolutional layers
        fc_input_dim = 16 * (input_dim // 4) * (input_dim // 4)
        
        # Create hidden fully connected layers
        self.fc_layers = nn.ModuleList()
        for _ in range(num_hidden_layers):
            self.fc_layers.append(nn.Linear(fc_input_dim, hidden_dim))
            fc_input_dim = hidden_dim
        
        # Final fully connected layer
        self.fc_out = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        for layer in self.fc_layers:
            x = F.relu(layer(x))
        x = self.fc_out(x)
        return F.log_softmax(x, dim=1)

In [5]:
# Counting the number of weights
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [6]:
# Previous FCN model
class ACAIGFCNWithInitialization(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, hidden_dim, initialization):
        super(ACAIGFCNWithInitialization, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dim:
            layer = nn.Linear(prev_dim, dim)
            initialization(layer.weight.data)
            layers.append(layer)
            layers.append(nn.ReLU())
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


**TASK 1 and 2**

In [7]:
input_dim = 784  # Input dimension
output_dim = 10  # Output dimension (number of classes)
num_hidden_layers = 2  # Adjustable number of hidden layers
hidden_dim = [100, 128]  # Adjustable number of neurons in each hidden layer
learning_rate = 0.1  # Adjustable learning rate
num_epochs = 15  # Adjustable number of epochs
initialization = nn.init.xavier_normal_

# Initialize neural network model with input, output and hidden layer dimensions
model = ACAIGFCNWithInitialization(input_dim, output_dim, num_hidden_layers, hidden_dim, initialization)

num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 92718


In [8]:
def train_model_with_initialization(optimizer, learning_rate, initialization, num_epochs=15):
    # Initialize model with specified initialization
    model = ACAIGFCNWithInitialization(input_dim, output_dim, num_hidden_layers, hidden_dim, initialization)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)

    # Training loop
    train_losses = []
    val_accuracies = []
    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm.tqdm(train_batches, desc=f'Epoch {epoch+1}/{num_epochs}'):
            optimizer.zero_grad()
            outputs = model(inputs.view(inputs.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_split)
        train_losses.append(epoch_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_batches:
                outputs = model(inputs.view(inputs.size(0), -1))
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

    # Testing
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_batches:
            outputs = model(inputs.view(inputs.size(0), -1))
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = test_correct / test_total

    return train_losses, val_accuracies, test_accuracy

In [9]:
import time
start = time.time()

In [10]:
initializations = {
    'Xavier Normal': nn.init.xavier_normal_,
}
results_initialization = {}
# Given my laptop capacity, I modified the baseline configuration and picked Adam optimizer with learning rate 0.001 to test the initializations
# as they are the best performer
for initialization_name, initialization_func in initializations.items():
    print(f'Training with Adam optimizer, learning rate 0.001, and initialization {initialization_name}...')
    train_losses, val_accuracies, test_accuracy = train_model_with_initialization(optim.Adam, 0.001, initialization_func)
    results_initialization[(optim.Adam, 0.001, initialization_name)] = {
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'test_accuracy': test_accuracy
    }

Training with Adam optimizer, learning rate 0.001, and initialization Xavier Normal...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 47.40it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 48.42it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 50.39it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 50.33it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 51.01it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 50.80it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 51.38it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 50.25it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 50.75it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 50.35it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 52.49it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 52.55it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 52.35it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 51.02it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 51.98it/s]


In [11]:
end = time.time()
runtime = end-start

In [12]:
runtime

41.34636878967285

In [13]:
for initialization_name, initialization_func in initializations.items():
    print(f'Test Accuracy with {initialization_name} Initialization: {test_accuracy * 100:.2f}%')

Test Accuracy with Xavier Normal Initialization: 88.14%


In [14]:
input_dim = 784  # Input dimension
output_dim = 10  # Output dimension (number of classes)
num_hidden_layers = 2  # Adjustable number of hidden layers
hidden_dim = [64, 32]  # Adjustable number of neurons in each hidden layer
learning_rate = 0.1  # Adjustable learning rate
num_epochs = 15  # Adjustable number of epochs
initialization = nn.init.xavier_normal_

# Initialize neural network model with input, output and hidden layer dimensions
model = ACAIGFCNWithInitialization(input_dim, output_dim, num_hidden_layers, hidden_dim, initialization)

num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 52650


In [15]:
start = time.time()

In [16]:
initializations = {
    'Xavier Normal': nn.init.xavier_normal_,
}
results_initialization = {}
# Given my laptop capacity, I modified the baseline configuration and picked Adam optimizer with learning rate 0.001 to test the initializations
# as they are the best performer
for initialization_name, initialization_func in initializations.items():
    print(f'Training with Adam optimizer, learning rate 0.001, and initialization {initialization_name}...')
    train_losses, val_accuracies, test_accuracy = train_model_with_initialization(optim.Adam, 0.001, initialization_func)
    results_initialization[(optim.Adam, 0.001, initialization_name)] = {
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'test_accuracy': test_accuracy
    }

Training with Adam optimizer, learning rate 0.001, and initialization Xavier Normal...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 49.03it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 50.46it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 51.88it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 52.92it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 52.95it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 50.66it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 52.86it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:01<00:00, 53.15it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:01<00:00, 53.07it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:01<00:00, 53.02it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:01<00:00, 53.08it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 51.85it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:01<00:00, 53.23it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 52.75it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 52.94it/s]


In [17]:
end = time.time()
runtime = end-start
runtime

44.07715606689453

In [75]:
for initialization_name, initialization_func in initializations.items():
    print(f'Test Accuracy with {initialization_name} Initialization: {test_accuracy * 100:.2f}%')

Test Accuracy with Xavier Normal Initialization: 86.99%


In [83]:
input_dim = 784  # Input dimension
output_dim = 10  # Output dimension (number of classes)
num_hidden_layers = 2  # Adjustable number of hidden layers
hidden_dim = [256, 32]  # Adjustable number of neurons in each hidden layer
learning_rate = 0.1  # Adjustable learning rate
num_epochs = 15  # Adjustable number of epochs
initialization = nn.init.xavier_normal_

# Initialize neural network model with input, output and hidden layer dimensions
model = ACAIGFCNWithInitialization(input_dim, output_dim, num_hidden_layers, hidden_dim, initialization)

num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 209514


In [84]:
initializations = {
    'Xavier Normal': nn.init.xavier_normal_,
}
results_initialization = {}
# Given my laptop capacity, I modified the baseline configuration and picked Adam optimizer with learning rate 0.001 to test the initializations
# as they are the best performer
for initialization_name, initialization_func in initializations.items():
    print(f'Training with Adam optimizer, learning rate 0.001, and initialization {initialization_name}...')
    train_losses, val_accuracies, test_accuracy = train_model_with_initialization(optim.Adam, 0.001, initialization_func)
    results_initialization[(optim.Adam, 0.001, initialization_name)] = {
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'test_accuracy': test_accuracy
    }
for initialization_name, initialization_func in initializations.items():
    print(f'Test Accuracy with {initialization_name} Initialization: {test_accuracy * 100:.2f}%')

Training with Adam optimizer, learning rate 0.001, and initialization Xavier Normal...


Epoch 1/15: 100%|██████████| 106/106 [00:02<00:00, 47.94it/s]
Epoch 2/15: 100%|██████████| 106/106 [00:02<00:00, 50.69it/s]
Epoch 3/15: 100%|██████████| 106/106 [00:02<00:00, 50.86it/s]
Epoch 4/15: 100%|██████████| 106/106 [00:02<00:00, 48.96it/s]
Epoch 5/15: 100%|██████████| 106/106 [00:02<00:00, 48.07it/s]
Epoch 6/15: 100%|██████████| 106/106 [00:02<00:00, 46.89it/s]
Epoch 7/15: 100%|██████████| 106/106 [00:02<00:00, 50.68it/s]
Epoch 8/15: 100%|██████████| 106/106 [00:02<00:00, 50.58it/s]
Epoch 9/15: 100%|██████████| 106/106 [00:02<00:00, 49.50it/s]
Epoch 10/15: 100%|██████████| 106/106 [00:02<00:00, 46.90it/s]
Epoch 11/15: 100%|██████████| 106/106 [00:02<00:00, 48.96it/s]
Epoch 12/15: 100%|██████████| 106/106 [00:02<00:00, 45.35it/s]
Epoch 13/15: 100%|██████████| 106/106 [00:02<00:00, 45.14it/s]
Epoch 14/15: 100%|██████████| 106/106 [00:02<00:00, 44.93it/s]
Epoch 15/15: 100%|██████████| 106/106 [00:02<00:00, 46.88it/s]


Test Accuracy with Xavier Normal Initialization: 88.26%


**TASK 3 and 4**

In [101]:
input_dim = 28  # FashionMNIST image size
output_dim = 10  # Number of classes
num_hidden_layers = 1  # Number of hidden layers
hidden_dim = 128  # Dimension of each hidden layer

model = CNN(input_dim, output_dim, num_hidden_layers, hidden_dim)

In [102]:
num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 103018


In [104]:
# Hyperparameters
learning_rate = 0.001
epochs = 10
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_batches, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/num_train_batches}")

# Test loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_batches:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Testing Accuracy: {100 * correct / total}%")

Epoch 1, Loss: 0.27943156012948955
Epoch 2, Loss: 0.25557203028561937
Epoch 3, Loss: 0.2503346283480806
Epoch 4, Loss: 0.24259258216282106
Epoch 5, Loss: 0.24043646243945607
Epoch 6, Loss: 0.22824564056014116
Epoch 7, Loss: 0.22172040942142596
Epoch 8, Loss: 0.2200470024122382
Epoch 9, Loss: 0.21824732577463365
Epoch 10, Loss: 0.20888327034014575
Testing Accuracy: 90.05%


In [105]:
input_dim = 28  # FashionMNIST image size
output_dim = 10  # Number of classes
num_hidden_layers = 1  # Number of hidden layers
hidden_dim = 64  # Dimension of each hidden layer

model = CNN(input_dim, output_dim, num_hidden_layers, hidden_dim)

In [106]:
num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 52138


In [107]:
# Hyperparameters
learning_rate = 0.001
epochs = 10
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_batches, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/num_train_batches}")

# Test loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_batches:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Testing Accuracy: {100 * correct / total}%")

Epoch 1, Loss: 0.8292431176271079
Epoch 2, Loss: 0.46860668513010134
Epoch 3, Loss: 0.4047158661316026
Epoch 4, Loss: 0.3734063061903108
Epoch 5, Loss: 0.3511682745983016
Epoch 6, Loss: 0.3396252618645722
Epoch 7, Loss: 0.3279499985137076
Epoch 8, Loss: 0.31355029681943497
Epoch 9, Loss: 0.3035225290453659
Epoch 10, Loss: 0.2964437506952376
Testing Accuracy: 88.14%


In [33]:
input_dim = 28  # FashionMNIST image size
output_dim = 10  # Number of classes
num_hidden_layers = 1  # Number of hidden layers
hidden_dim = 32  # Dimension of each hidden layer

model = CNN(input_dim, output_dim, num_hidden_layers, hidden_dim)

In [34]:
num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 20954


In [108]:
# Hyperparameters
learning_rate = 0.001
epochs = 10
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_batches, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/num_train_batches}")

# Test loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_batches:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Testing Accuracy: {100 * correct / total}%")

Epoch 1, Loss: 0.2911332367726092
Epoch 2, Loss: 0.2821530344070129
Epoch 3, Loss: 0.27392544231887134
Epoch 4, Loss: 0.2648727003976984
Epoch 5, Loss: 0.25513743820055473
Epoch 6, Loss: 0.2526727726437011
Epoch 7, Loss: 0.24650486432156474
Epoch 8, Loss: 0.23747228690475788
Epoch 9, Loss: 0.23414623414008123
Epoch 10, Loss: 0.22878824314981136
Testing Accuracy: 89.41%


In [109]:
# Hyperparameters
input_dim = 28  # FashionMNIST image size
output_dim = 10  # Number of classes
num_hidden_layers = 1  # Number of hidden layers
hidden_dim = 16  # Dimension of each hidden layer

model = CNN(input_dim, output_dim, num_hidden_layers, hidden_dim)

In [110]:
num_weights = count_parameters(model)
print("Number of weights before adjustment:", num_weights)

Number of weights before adjustment: 13978


In [111]:
# Hyperparameters
learning_rate = 0.001
epochs = 10
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_batches, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/num_train_batches}")

# Test loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_batches:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Testing Accuracy: {100 * correct / total}%")

Epoch 1, Loss: 1.2835093584825408
Epoch 2, Loss: 0.585116852285727
Epoch 3, Loss: 0.4869310296369049
Epoch 4, Loss: 0.43950424441751446
Epoch 5, Loss: 0.41234751495550265
Epoch 6, Loss: 0.3900205847227348
Epoch 7, Loss: 0.3748229227538379
Epoch 8, Loss: 0.36453474692578586
Epoch 9, Loss: 0.35269213959855855
Epoch 10, Loss: 0.3493419312081247
Testing Accuracy: 86.93%
