In [1]:
import numpy as np
import torch
from torch import nn
import tqdm

In [2]:
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split


In [4]:
# Use the following code to load and normalize the dataset for training and testing
# It will downlad the dataset into data subfolder (change to your data folder name)
train_dataset = torchvision.datasets.FashionMNIST('data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

test_dataset = torchvision.datasets.FashionMNIST('data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))


# Use the following code to create a validation set of 10%
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_dataset)),
    train_dataset.targets,
    stratify=train_dataset.targets,
    test_size=0.1,
)

# Generate training and validation subsets based on indices
train_split = Subset(train_dataset, train_indices)
val_split = Subset(train_dataset, val_indices)


# set batches sizes
train_batch_size = 512 #Define train batch size
test_batch_size  = 256 #Define test batch size (can be larger than train batch size)


# Define dataloader objects that help to iterate over batches and samples for
# training, validation and testing
train_batches = DataLoader(train_split, batch_size=train_batch_size, shuffle=True)
val_batches = DataLoader(val_split, batch_size=train_batch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)

num_train_batches=len(train_batches)
num_val_batches=len(val_batches)
num_test_batches=len(test_batches)


print(num_train_batches)
print(num_val_batches)
print(num_test_batches)

106
12
40


In [5]:
class ACAIGFCN(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims, dropout_prob):
        super(ACAIGFCN, self).__init__()

        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()  # BatchNorm layers
        self.dropouts = nn.ModuleList()

        prev_dim = input_dim

        # Hidden Layers with BatchNorm and Dropout
        for h_dim in hidden_dims:
            layer = nn.Linear(prev_dim, h_dim)
            self.layers.append(layer)
            self.batch_norms.append(nn.BatchNorm1d(h_dim))  # Apply BatchNorm
            self.dropouts.append(nn.Dropout(p=dropout_prob))  # Apply Dropout
            prev_dim = h_dim

        # Output Layer (no BatchNorm or Dropout)
        self.output_layer = nn.Linear(prev_dim, output_dim)

        # Apply Kaiming Uniform Initialization
        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')
                nn.init.zeros_(layer.bias)  # Initialize bias to zero

        nn.init.kaiming_uniform_(self.output_layer.weight, nonlinearity='linear')
        nn.init.zeros_(self.output_layer.bias)

    def forward(self, input):
        for layer, batch_norm, dropout in zip(self.layers, self.batch_norms, self.dropouts):
            input = layer(input)  # Linear layer
            input = batch_norm(input)  # Normalize activations
            input = torch.nn.functional.relu(input)  # Apply ReLU
            input = dropout(input)  # Apply Dropout

        input = self.output_layer(input)  # Final output
        return input


def FCN_accuracy(batchSize, test_batch_size, epochs,
                  hidden_dim, learning_rate, optimizer,dropout):

  train_batches = DataLoader(train_split, batch_size=batchSize, shuffle=True)
  val_batches = DataLoader(val_split, batch_size=batchSize, shuffle=True)
  test_batches = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)

  # Initialize neural network model #[50, 64, 80]
  model = ACAIGFCN(input_dim = 784, output_dim = 10, hidden_dims = hidden_dim, dropout_prob=dropout)

  train_loss_list = np.zeros((epochs,))
  validation_accuracy_list = np.zeros((epochs,))

  # Define loss function  and optimizer
  loss_func = torch.nn.CrossEntropyLoss() # Cross Entropy loss
  optm = optimizer(model.parameters(), lr=learning_rate) # torch.optim.SGD

  # Iterate over epochs, batches with progress bar and train+ validate the ACAIGFCN
  # Track the loss and validation accuracy
  for epoch in tqdm.trange(epochs):
    total_loss = 0

    # ACAIGFCN Training
    for train_features, train_labels in train_batches:
        # Set model into training mode
        model.train()

        # Reshape images into a vector
        train_features = train_features.reshape(-1, 28*28)

        # Reset gradients, Calculate training loss on model
        # Perfrom optimization, back propagation
        optm.zero_grad()
        outputs = model(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optm.step()

        total_loss += loss.item()

    # Record loss for the epoch
    train_loss_list[epoch] = total_loss / len(train_batches)


    # ACAIGFCN Validation
    correct = 0
    total = 0
    for val_features, val_labels in val_batches:
        # Telling PyTorch we aren't passing inputs to network for training purpose
        with torch.no_grad():
            model.eval()

            # Reshape validation images into a vector
            val_features = val_features.reshape(-1, 28*28)

            # Compute validation outputs (targets)
            # and compute accuracy
            validation_outputs = model(val_features)

            correct += (torch.argmax(validation_outputs, dim=1) == val_labels).sum().item()#.type(torch.FloatTensor)
            total += val_labels.size(0)
    validation_accuracy_list[epoch] = (correct / total) * 100
            #validation_accuracy_list[epoch] += correct.mean().item()

    #validation_accuracy_list[epoch] = validation_accuracy_list[epoch]/len(val_batches)

    # Record accuracy for the epoch; print training loss, validation accuracy val_acc * 100:.2f
    print(f"Epoch {epoch+1}: Training Loss = {train_loss_list[epoch]:.4f}, Validation Accuracy: {validation_accuracy_list[epoch] :.2f}%") #)


  #Calculate accuracy on test set
  t_correct = 0
  t_total = 0
  # Telling PyTorch we aren't passing inputs to network for training purpose
  with torch.no_grad():
      for test_features, test_labels in test_batches:
        model.eval()
        # Reshape test images into a vector
        test_features = test_features.reshape(-1, 28*28)

        # Get model predictions
        t_outputs = model(test_features)
        predicted = torch.argmax(t_outputs, dim=1)

        # Count correct predictions
        t_correct += (predicted == test_labels).sum().item()
        t_total += test_labels.size(0)

  # Compute total test accuracy
  test_accuracy = (t_correct / t_total) * 100

  return train_loss_list, validation_accuracy_list, test_accuracy


In [6]:
train_loss_list, validation_accuracy_list, test_accuracy = FCN_accuracy(
    batchSize=400,
    test_batch_size=156,
    epochs=10,
    hidden_dim=[100, 150, 100],
    learning_rate=0.005,
    optimizer=torch.optim.Adam,
    dropout=0.1  # Adjust dropout rate
  )
print(f'100K_FCN test_accuracy{test_accuracy}')
#t = 2m2s, 100K_FCN test_accuracy 88.14999999999999

 10%|█         | 1/10 [00:12<01:52, 12.55s/it]

Epoch 1: Training Loss = 0.5325, Validation Accuracy: 86.38%


 20%|██        | 2/10 [00:27<01:49, 13.70s/it]

Epoch 2: Training Loss = 0.3797, Validation Accuracy: 86.22%


 30%|███       | 3/10 [00:38<01:28, 12.71s/it]

Epoch 3: Training Loss = 0.3441, Validation Accuracy: 87.47%


 40%|████      | 4/10 [00:50<01:15, 12.51s/it]

Epoch 4: Training Loss = 0.3171, Validation Accuracy: 86.97%


 50%|█████     | 5/10 [01:02<01:00, 12.19s/it]

Epoch 5: Training Loss = 0.3010, Validation Accuracy: 88.10%


 60%|██████    | 6/10 [01:14<00:47, 11.99s/it]

Epoch 6: Training Loss = 0.2879, Validation Accuracy: 89.12%


 70%|███████   | 7/10 [01:25<00:35, 11.85s/it]

Epoch 7: Training Loss = 0.2711, Validation Accuracy: 88.27%


 80%|████████  | 8/10 [01:37<00:23, 11.83s/it]

Epoch 8: Training Loss = 0.2601, Validation Accuracy: 88.73%


 90%|█████████ | 9/10 [01:49<00:11, 11.82s/it]

Epoch 9: Training Loss = 0.2511, Validation Accuracy: 89.30%


100%|██████████| 10/10 [02:00<00:00, 12.03s/it]

Epoch 10: Training Loss = 0.2447, Validation Accuracy: 89.32%





100K_FCN test_accuracy88.14999999999999


In [8]:
# Task 2: FCN 50K and 200K
train_loss_list_50K, validation_accuracy_list_50K, test_accuracy_50K = FCN_accuracy(
    batchSize=400,
    test_batch_size=156,
    epochs=10,
    hidden_dim=[70, 34],
    learning_rate=0.005,
    optimizer=torch.optim.Adam,
    dropout=0.1  # Adjust dropout rate
  )
print(f'50K_FCN test_accuracy{test_accuracy_50K}')
#t = 1m54s, 50K_FCN test_accuracy87.69

 10%|█         | 1/10 [00:10<01:38, 10.93s/it]

Epoch 1: Training Loss = 0.6297, Validation Accuracy: 86.03%


 20%|██        | 2/10 [00:21<01:27, 10.96s/it]

Epoch 2: Training Loss = 0.4088, Validation Accuracy: 87.08%


 30%|███       | 3/10 [00:33<01:17, 11.05s/it]

Epoch 3: Training Loss = 0.3668, Validation Accuracy: 87.32%


 40%|████      | 4/10 [00:45<01:08, 11.44s/it]

Epoch 4: Training Loss = 0.3454, Validation Accuracy: 88.02%


 50%|█████     | 5/10 [00:57<00:58, 11.77s/it]

Epoch 5: Training Loss = 0.3218, Validation Accuracy: 88.32%


 60%|██████    | 6/10 [01:08<00:46, 11.67s/it]

Epoch 6: Training Loss = 0.3081, Validation Accuracy: 88.25%


 70%|███████   | 7/10 [01:19<00:34, 11.38s/it]

Epoch 7: Training Loss = 0.2996, Validation Accuracy: 89.00%


 80%|████████  | 8/10 [01:30<00:22, 11.26s/it]

Epoch 8: Training Loss = 0.2910, Validation Accuracy: 88.58%


 90%|█████████ | 9/10 [01:41<00:11, 11.22s/it]

Epoch 9: Training Loss = 0.2814, Validation Accuracy: 88.33%


100%|██████████| 10/10 [01:52<00:00, 11.29s/it]

Epoch 10: Training Loss = 0.2707, Validation Accuracy: 88.27%





50K_FCN test_accuracy87.69


In [39]:
train_loss_list_200K, validation_accuracy_list_200K, test_accuracy_200K = FCN_accuracy(
    batchSize=400,
    test_batch_size=156,
    epochs=10,
    hidden_dim=[200, 150, 100, 64],
    learning_rate=0.005,
    optimizer=torch.optim.Adam,
    dropout=0.1  # Adjust dropout rate
  )
print(f'200K_FCN test_accuracy: {test_accuracy_200K}')
#t = 2m1s, 200K_FCN test_accuracy: 88.0

 10%|█         | 1/10 [00:12<01:48, 12.02s/it]

Epoch 1: Training Loss = 0.5502, Validation Accuracy: 85.07%


 20%|██        | 2/10 [00:23<01:35, 11.90s/it]

Epoch 2: Training Loss = 0.3845, Validation Accuracy: 86.90%


 30%|███       | 3/10 [00:36<01:24, 12.06s/it]

Epoch 3: Training Loss = 0.3453, Validation Accuracy: 87.65%


 40%|████      | 4/10 [00:47<01:11, 12.00s/it]

Epoch 4: Training Loss = 0.3240, Validation Accuracy: 88.68%


 50%|█████     | 5/10 [00:59<00:59, 11.93s/it]

Epoch 5: Training Loss = 0.3011, Validation Accuracy: 87.98%


 60%|██████    | 6/10 [01:11<00:47, 11.94s/it]

Epoch 6: Training Loss = 0.2805, Validation Accuracy: 88.78%


 70%|███████   | 7/10 [01:23<00:35, 11.91s/it]

Epoch 7: Training Loss = 0.2727, Validation Accuracy: 89.02%


 80%|████████  | 8/10 [01:35<00:23, 11.86s/it]

Epoch 8: Training Loss = 0.2606, Validation Accuracy: 88.85%


 90%|█████████ | 9/10 [01:47<00:11, 11.83s/it]

Epoch 9: Training Loss = 0.2474, Validation Accuracy: 89.95%


100%|██████████| 10/10 [01:58<00:00, 11.89s/it]

Epoch 10: Training Loss = 0.2395, Validation Accuracy: 89.23%





200K_FCN test_accuracy: 88.0


In [17]:
import torch.optim as optim

# CNN Model
class CNN(nn.Module):
    def __init__(self, num_filters=32, hidden_dim=100, output_size=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, num_filters, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(num_filters, num_filters * 2, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(num_filters * 2 * 7 * 7, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_size)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# Train and evaluate function
def train_evaluate(model, train_loader, test_loader, epochs=10, lr=0.005, optimizer_type=optim.Adam):
    optimizer = optimizer_type(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate model
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

# Define dataset loaders
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5,), (0.5,))])
train_loader = DataLoader(torchvision.datasets.FashionMNIST(root="./data", train=True, download=True, transform=transform), batch_size=400, shuffle=True)
test_loader = DataLoader(torchvision.datasets.FashionMNIST(root="./data", train=False, download=True, transform=transform), batch_size=156, shuffle=False)


In [40]:
# Task 3: CNN 100K Model
import itertools
import torch.nn.functional as F

CNN_100K_model = CNN(num_filters=10, hidden_dim=90)

lr_values = [0.001, 0.005, 0.01]
optimizer_types = [optim.Adam, optim.SGD]

def hyperparameter_tuning(train_loader, test_loader, epochs=10):
    best_accuracy = 0
    best_params = {}

    param_combinations = list(itertools.product(lr_values, optimizer_types))

    for lr, optimizer_type in param_combinations:
        print(f"Training with lr={lr}, optimizer={optimizer_type}")

        # Train and evaluate the model
        accuracy = train_evaluate(CNN_100K_model, train_loader, test_loader, epochs=epochs, lr=lr, optimizer_type=optimizer_type)

        print(f"Accuracy: {accuracy}")

        # Update the best parameters if we get a higher accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'lr': lr, 'optimizer_type': optimizer_type}

    print("Best hyperparameters:", best_params)
    print("Best accuracy:", best_accuracy)

# Perform hyperparameter tuning
hyperparameter_tuning(train_loader, test_loader, epochs=10)

Training with lr=0.001, optimizer=<class 'torch.optim.adam.Adam'>
Accuracy: 0.8932
Training with lr=0.001, optimizer=<class 'torch.optim.sgd.SGD'>
Accuracy: 0.8966
Training with lr=0.005, optimizer=<class 'torch.optim.adam.Adam'>
Accuracy: 0.9093
Training with lr=0.005, optimizer=<class 'torch.optim.sgd.SGD'>
Accuracy: 0.9171
Training with lr=0.01, optimizer=<class 'torch.optim.adam.Adam'>
Accuracy: 0.9058
Training with lr=0.01, optimizer=<class 'torch.optim.sgd.SGD'>
Accuracy: 0.9155
Best hyperparameters: {'lr': 0.005, 'optimizer_type': <class 'torch.optim.sgd.SGD'>}
Best accuracy: 0.9171


In [29]:
# Task 4: CNN Variants (50K, 20K, 10K)
cnn_50k = CNN(num_filters=8, hidden_dim=64)
cnn_20k = CNN(num_filters=6, hidden_dim=32)
cnn_10k = CNN(num_filters=4, hidden_dim=10)

In [36]:
cnn_50k_acc = train_evaluate(cnn_50k, train_loader, test_loader,optimizer_type=torch.optim.SGD)
print(f"CNN 50K Accuracy: {cnn_50k_acc:.4f}")
#time = 2m15s, CNN 50K Accuracy: 0.8988

CNN 50K Accuracy: 0.8988


In [42]:
cnn_20k_acc = train_evaluate(cnn_20k, train_loader, test_loader, optimizer_type=torch.optim.SGD)

print(f"CNN 20K Accuracy: {cnn_20k_acc:.4f}")
#t = 2m32s
#CNN 20K Accuracy:0.7938

CNN 20K Accuracy: 0.7938


In [41]:
cnn_10k_acc = train_evaluate(cnn_10k, train_loader, test_loader, optimizer_type=torch.optim.SGD)
print(f"CNN 10K Accuracy: {cnn_10k_acc:.4f}")
#t = 2m29s,0.7875

CNN 10K Accuracy: 0.7875
