In [21]:
## External Libararies
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

In [22]:
## Additional External Libraries (Deep Learning)
import torch
import torch.nn as nn
import random
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import transforms as tfs
from PIL import Image
from torchvision.datasets import FashionMNIST

In [23]:
# Hyperparameter (Feel free to make modifications)
TRAIN_BATCH_SIZE = 50
VAL_BATCH_SIZE = 50
TEST_BATCH_SIZE = 1

# Transform data to PIL images
transforms = tfs.Compose([tfs.ToTensor()])

# Train/Val Subsets
train_mask = range(50000)
val_mask = range(50000, 60000)

# Download/Load Dataset
train_dataset = FashionMNIST('./data', train=True, transform=transforms, download=True)
test_dataset = FashionMNIST('./data', train=False, transform=transforms, download=True)

# Data Loaders
train_dataloader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, sampler=SubsetRandomSampler(train_mask))
val_dataloader = DataLoader(train_dataset, batch_size=VAL_BATCH_SIZE, sampler=SubsetRandomSampler(val_mask))
test_dataloader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE)

In [24]:
class CNNet_2a(nn.Module):

    def __init__(self, seed_value):
        """

        """
        ## Inherent Torch Module
        super(CNNet_2a, self).__init__()

        # First conv layer with ReLU and MaxPool: 28*28*1 -> 28*28*32 -> 14*14*32
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Second conv layer with ReLU and MaxPool: 14*14*32 -> 12*12*64 -> 6*6*64
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )


        # Third fc layer with sigmoid activation
        self.fc1 = nn.Linear(6*6*64, 600)
        self.sig1 = nn.Sigmoid()

        # Fourth fc layer with sigmoid activation
        self.fc2 = nn.Linear(600, 120)
        self.sig2 = nn.Sigmoid()

        # Output layer
        self.output = nn.Linear(120, 10)

        self._initialize_weights(seed_value)


    def _initialize_weights(self, seed_value):
        """
        Initialize the weights and biases of the model.
        """
        torch.manual_seed(seed_value)
        torch.nn.init.xavier_uniform_(self.conv1[0].weight)
        torch.nn.init.xavier_uniform_(self.conv2[0].weight)

        torch.manual_seed(seed_value)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.normal_(self.fc1.bias)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        torch.nn.init.normal_(self.fc2.bias)
        torch.nn.init.xavier_uniform_(self.output.weight)
        torch.nn.init.normal_(self.output.bias)


    def forward(self, x):
        """
        """
        ##TODO: Setup Forward Pass
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.sig1(self.fc1(x))
        x = self.sig2(self.fc2(x))
        x = self.output(x)

        return x

In [25]:
def train(model,
          loss_f,
          optimizer,
          n_epoch=50,
          train_dataloader=train_dataloader,
          val_dataloader=val_dataloader,
          test_dataloader=test_dataloader,
          seed_value=None):
  # Define lists to store training and validation losses
  train_losses = []
  val_losses = []
  train_accs = []
  val_accs = []

  best_model = model
  best_train_acc = -1
  best_val_acc = -1
  best_ind = -1

  # Training loop
  for epoch in range(n_epoch):
      model.train()
      train_loss = 0.0
      train_correct = 0

      # Iterate through the training data
      for inputs, labels in train_dataloader:
          # Forward pass
          output = model(inputs)

          # Compute loss
          loss = loss_f(output, labels)

          # Backpropagation
          optimizer.zero_grad()  # Zero the gradients
          loss.backward()
          optimizer.step()

          # Update running training loss
          train_loss += loss.item()
          predict = output.argmax(axis=1)
          train_correct += (predict == labels).float().sum()

      # Compute average training loss for the epoch
      avg_train_loss = train_loss / 50000
      train_losses.append(avg_train_loss)
      train_acc = train_correct / 50000
      train_accs.append(train_acc)

      # Validation
      model.eval()
      val_loss = 0.0
      val_correct = 0
      with torch.no_grad():
          for inputs, labels in val_dataloader:
              output = model(inputs)
              loss = loss_f(output, labels)
              val_loss += loss.item()
              predict = output.argmax(axis=1)
              val_correct += (predict == labels).float().sum()

      avg_val_loss = val_loss / 10000
      val_losses.append(avg_val_loss)
      val_acc = val_correct / 10000
      val_accs.append(val_acc)

      # Record the best model
      if val_acc > best_val_acc:
        best_model = model
        best_ind = epoch
        best_train_acc = train_acc
        best_val_acc = val_acc

      print(f'Epoch [{epoch+1}/{n_epoch}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
      print(f'Train accuracy: {train_acc:.4f}, Val accuracy: {val_acc:.4f}')

  # Test
  model.eval()
  test_loss = 0.0
  test_correct = 0
  with torch.no_grad():
      for inputs, labels in test_dataloader:
          output = best_model(inputs)
          loss = loss_f(output, labels)
          test_loss += loss.item()
          predict = output.argmax(axis=1)
          test_correct += (predict == labels).float().sum()

  best_test_loss = test_loss / len(test_dataloader)
  best_test_acc = test_correct / len(test_dataloader)

  print(f'Best Model achieved at epoch: {best_ind:.4f}')
  print(f'train accuracy: {best_train_acc:.4f}, validation accuracy: {best_val_acc:.4f}, test accuracy: {best_test_acc:.4f}')


  return train_losses, val_losses, train_accs, val_accs, best_model, best_ind, best_train_acc, best_val_acc, best_test_acc

In [26]:
n_epoch = 50
lr = 0.5
seed_Conv = random.randint(0, 10000)
model = CNNet_2a(seed_Conv)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
loss_f = nn.CrossEntropyLoss()

In [None]:
## Run Training Loop
# Result
train_loss_epochs = []
valid_accs_epochs = []

train_losses, val_losses, train_accs, val_accs, best_model, best_ind, best_train_acc, best_val_acc, best_test_acc = train(
    model, loss_f, optimizer, n_epoch = n_epoch, seed_value = seed_Conv
)

Epoch [1/50], Train Loss: 0.0154, Val Loss: 0.0108
Train accuracy: 0.7022, Val accuracy: 0.7881
Epoch [2/50], Train Loss: 0.0083, Val Loss: 0.0073
Train accuracy: 0.8441, Val accuracy: 0.8649
Epoch [3/50], Train Loss: 0.0069, Val Loss: 0.0070
Train accuracy: 0.8700, Val accuracy: 0.8685
Epoch [4/50], Train Loss: 0.0062, Val Loss: 0.0062
Train accuracy: 0.8822, Val accuracy: 0.8836
Epoch [5/50], Train Loss: 0.0057, Val Loss: 0.0057
Train accuracy: 0.8927, Val accuracy: 0.8943
Epoch [6/50], Train Loss: 0.0053, Val Loss: 0.0057
Train accuracy: 0.8995, Val accuracy: 0.8916
Epoch [7/50], Train Loss: 0.0049, Val Loss: 0.0057
Train accuracy: 0.9072, Val accuracy: 0.8922
Epoch [8/50], Train Loss: 0.0046, Val Loss: 0.0052
Train accuracy: 0.9128, Val accuracy: 0.9058
Epoch [9/50], Train Loss: 0.0042, Val Loss: 0.0053
Train accuracy: 0.9192, Val accuracy: 0.9063
Epoch [10/50], Train Loss: 0.0039, Val Loss: 0.0052
Train accuracy: 0.9249, Val accuracy: 0.9074


In [None]:
epochs = [*range(0, 50, 1)]
loss_list = [train_losses, val_losses]
acc_list = [train_accs, val_accs]

fig, axs = plt.subplots(1, 2,figsize=(20, 7))
for losses in loss_list:
  axs[0].plot(epochs, losses)
axs[0].set_title('Training Loss')
axs[0].legend(['train', 'validation'])
for accs in acc_list:
  axs[1].plot(epochs, accs)
axs[1].set_title('Testing Accuracy')
axs[1].legend(['train', 'validation'])


plt.show()