In [34]:
## External Libararies
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

In [35]:
## Additional External Libraries (Deep Learning)
import torch
import torch.nn as nn
import random
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import transforms as tfs
from PIL import Image
from torchvision.datasets import FashionMNIST

In [36]:
# Hyperparameter (Feel Free to Change These, but Make Sure your Training Loop Still Works as Expected)
TRAIN_BATCH_SIZE = 50
VAL_BATCH_SIZE = 50
TEST_BATCH_SIZE = 1

# Transform data to PIL images
transforms = tfs.Compose([tfs.ToTensor()]) ##TODO: Use the same from above or consider alternatives

# Train/Val Subsets
train_mask = range(50000)
val_mask = range(50000, 60000)

# Download/Load Dataset
train_dataset = FashionMNIST('./data', train=True, transform=transforms, download=True)
test_dataset = FashionMNIST('./data', train=False, transform=transforms, download=True)

# Data Loaders
train_dataloader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, sampler=SubsetRandomSampler(train_mask))
val_dataloader = DataLoader(train_dataset, batch_size=VAL_BATCH_SIZE, sampler=SubsetRandomSampler(val_mask))
test_dataloader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE)

In [37]:
class CNNet_2b(nn.Module):

    def __init__(self, seed_value):
        """

        """
        ## Inherent Torch Module
        super(CNNet_2b, self).__init__()

        # First conv layer with ReLU and MaxPool: 28*28*1 -> 28*28*32 -> 14*14*32
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Second conv layer with ReLU and MaxPool: 14*14*32 -> 12*12*64 -> 6*6*64
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )


        # Third fc layer with sigmoid activation
        self.fc1 = nn.Linear(6*6*64, 648)
        self.sig1 = nn.Sigmoid()
        self.drop1 = nn.Dropout2d(0.25)

        # Fourth fc layer with sigmoid activation
        self.fc2 = nn.Linear(648, 128)
        self.sig2 = nn.Sigmoid()
        self.drop2 = nn.Dropout2d(0.25)

        # Output layer
        self.output = nn.Linear(128, 10)

        self._initialize_weights(seed_value)


    def _initialize_weights(self, seed_value):
        """
        Initialize the weights and biases of the model.
        """
        torch.manual_seed(seed_value)
        torch.nn.init.xavier_uniform_(self.conv1[0].weight)
        torch.nn.init.xavier_uniform_(self.conv2[0].weight)

        torch.manual_seed(seed_value)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.normal_(self.fc1.bias)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        torch.nn.init.normal_(self.fc2.bias)
        torch.nn.init.xavier_uniform_(self.output.weight)
        torch.nn.init.normal_(self.output.bias)


    def forward(self, x):
        """
        """
        ##TODO: Setup Forward Pass
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.drop1(self.sig1(self.fc1(x)))
        x = self.drop2(self.sig2(self.fc2(x)))
        #x = self.sig1(self.fc1(x))
        #x = self.sig2(self.fc2(x))
        x = self.output(x)

        return x

In [38]:
def train(model,
          loss_f,
          optimizer,
          n_epoch=50,
          train_dataloader=train_dataloader,
          val_dataloader=val_dataloader,
          test_dataloader=test_dataloader,
          seed_value=None):
  # Define lists to store training and validation losses
  train_losses = []
  val_losses = []
  train_accs = []
  val_accs = []

  best_model = model
  best_train_acc = -1
  best_val_acc = -1
  best_ind = -1

  # Training loop
  for epoch in range(n_epoch):
      model.train()
      train_loss = 0.0
      train_correct = 0

      # Iterate through the training data
      for inputs, labels in train_dataloader:
          # Forward pass
          output = model(inputs)

          # Compute loss
          loss = loss_f(output, labels)

          # Backpropagation
          optimizer.zero_grad()  # Zero the gradients
          loss.backward()
          optimizer.step()

          # Update running training loss
          train_loss += loss.item()
          predict = output.argmax(axis=1)
          train_correct += (predict == labels).float().sum()

      # Compute average training loss for the epoch
      avg_train_loss = train_loss / 50000
      train_losses.append(avg_train_loss)
      train_acc = train_correct / 50000
      train_accs.append(train_acc)

      # Validation
      model.eval()
      val_loss = 0.0
      val_correct = 0
      with torch.no_grad():
          for inputs, labels in val_dataloader:
              output = model(inputs)
              loss = loss_f(output, labels)
              val_loss += loss.item()
              predict = output.argmax(axis=1)
              val_correct += (predict == labels).float().sum()

      avg_val_loss = val_loss / 10000
      val_losses.append(avg_val_loss)
      val_acc = val_correct / 10000
      val_accs.append(val_acc)

      # Record the best model
      if val_acc > best_val_acc:
        best_model = model
        best_ind = epoch
        best_train_acc = train_acc
        best_val_acc = val_acc

      print(f'Epoch [{epoch+1}/{n_epoch}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
      print(f'Train accuracy: {train_acc:.4f}, Val accuracy: {val_acc:.4f}')

  # Test
  model.eval()
  test_loss = 0.0
  test_correct = 0
  with torch.no_grad():
      for inputs, labels in test_dataloader:
          output = best_model(inputs)
          loss = loss_f(output, labels)
          test_loss += loss.item()
          predict = output.argmax(axis=1)
          test_correct += (predict == labels).float().sum()

  best_test_loss = test_loss / len(test_dataloader)
  best_test_acc = test_correct / len(test_dataloader)

  print(f'Best Model achieved at epoch: {best_ind:.4f}')
  print(f'train accuracy: {best_train_acc:.4f}, validation accuracy: {best_val_acc:.4f}, test accuracy: {best_test_acc:.4f}')


  return train_losses, val_losses, train_accs, val_accs, best_model, best_ind, best_train_acc, best_val_acc, best_test_acc

In [39]:
##TODO: Fit and evaluate your model. What do you observe?
n_epoch = 50
lr = 0.5
seed_Conv = random.randint(0, 10000)
model = CNNet_2b(seed_Conv)

# different optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
loss_f = nn.CrossEntropyLoss()

## Run Training Loop
# Result

train_losses, val_losses, train_accs, val_accs, best_model, best_ind, best_train_acc, best_val_acc, best_test_acc = train(
    model, loss_f, optimizer, n_epoch, seed_value = seed_Conv
)

Epoch [1/50], Train Loss: 0.0102, Val Loss: 0.0072
Train accuracy: 0.8110, Val accuracy: 0.8654
Epoch [2/50], Train Loss: 0.0064, Val Loss: 0.0060
Train accuracy: 0.8828, Val accuracy: 0.8882
Epoch [3/50], Train Loss: 0.0054, Val Loss: 0.0057
Train accuracy: 0.8997, Val accuracy: 0.8929
Epoch [4/50], Train Loss: 0.0049, Val Loss: 0.0051
Train accuracy: 0.9094, Val accuracy: 0.9047
Epoch [5/50], Train Loss: 0.0044, Val Loss: 0.0047
Train accuracy: 0.9176, Val accuracy: 0.9141
Epoch [6/50], Train Loss: 0.0041, Val Loss: 0.0051
Train accuracy: 0.9243, Val accuracy: 0.9055
Epoch [7/50], Train Loss: 0.0038, Val Loss: 0.0045
Train accuracy: 0.9291, Val accuracy: 0.9154
Epoch [8/50], Train Loss: 0.0034, Val Loss: 0.0044
Train accuracy: 0.9357, Val accuracy: 0.9182
Epoch [9/50], Train Loss: 0.0032, Val Loss: 0.0041
Train accuracy: 0.9413, Val accuracy: 0.9243
Epoch [10/50], Train Loss: 0.0029, Val Loss: 0.0047
Train accuracy: 0.9440, Val accuracy: 0.9174
Epoch [11/50], Train Loss: 0.0028, Val 

KeyboardInterrupt: ignored