In [None]:
# import dependencies
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from torch import nn

# Initiation of model classes

In [None]:
# Model 1
class IaroHasNoFashionNet1(nn.Module):

    # define the class constructor
    def __init__(self):

        # call super class constructor
        super(IaroHasNoFashionNet1, self).__init__()

        # specify convolution layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=5, stride=1, padding=0)

        # define max-pooling layer 1
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # specify fully-connected (fc) layer 1 - in 12*12*3, out 48
        self.linear1 = nn.Linear(12*12*3, 48, bias=True)  # the linearity W*x+b
        self.relu1 = nn.ReLU(inplace=True)  # the non-linearity

        # specify fc layer 2 - in 48, out 10
        self.linear2 = nn.Linear(48, 10, bias=True)  # the linearity W*x+b

        # add a softmax to the last layer
        self.logsoftmax = nn.LogSoftmax(dim=1)  # the softmax

    # define network forward pass
    def forward(self, images):
        # high-level feature learning via convolutional layers

        # define conv layer 1 forward pass
        x = self.pool1(self.relu1(self.conv1(images)))

        # reshape image pixels
        x = x.view(-1, 12*12*3)

        # define fc layer 1 forward pass
        x = self.relu1(self.linear1(x))

        # define layer 2 forward pass
        x = self.logsoftmax(self.linear2(x))

        # return forward pass result
        return x

In [None]:
# Model 3
class IaroHasNoFashionNet3(nn.Module):

    # define the class constructor
    def __init__(self):

        # call super class constructor
        super(IaroHasNoFashionNet3, self).__init__()

        # specify convolution layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=60, kernel_size=5, stride=1, padding=0)

        # define max-pooling layer 1
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # specify convolution layer 2
        self.conv2 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=3, stride=1, padding=0)

        # specify fully-connected (fc) layer 1 - in 8*8*20, out 640
        self.linear1 = nn.Linear(10*10*20, 1000, bias=True)  # the linearity W*x+b
        self.relu1 = nn.ReLU(inplace=True)  # the non-linearity

        # specify fc layer 2 - in 640, out 100
        self.linear2 = nn.Linear(1000, 100, bias=True)  # the linearity W*x+b

        # specify fc layer 2 - in 100, out 10
        self.linear3 = nn.Linear(100, 10, bias=True)  # the linearity W*x+b

        # add a softmax to the last layer
        self.logsoftmax = nn.LogSoftmax(dim=1)  # the softmax

    # define network forward pass
    def forward(self, images):
        # high-level feature learning via convolutional layers

        # define conv layer 1 forward pass
        x = self.pool1(self.relu1(self.conv1(images)))

        # define conv layer 2 forward pass
        x = self.relu1(self.conv2(x))

        # reshape image pixels
        x = x.view(-1, 10*10*20)

        # define fc layer 1 forward pass
        x = self.relu1(self.linear1(x))

        # define fc layer 2 forward pass
        x = self.relu1(self.linear2(x))

        # define layer 3 forward pass
        x = self.logsoftmax(self.linear3(x))

        # return forward pass result
        return x

In [None]:
# Model 4
class IaroHasNoFashionNet4(nn.Module):

    # define the class constructor
    def __init__(self):

        # call super class constructor
        super(IaroHasNoFashionNet4, self).__init__()

        # specify convolution layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=3, stride=1, padding=0)

        # define max-pooling layer 1
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # specify convolution layer 2
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=60, kernel_size=3, stride=1, padding=1)

        # specify fully-connected (fc) layer 1 - in 8*8*20, out 640
        self.linear1 = nn.Linear(6*6*60, 1000, bias=True)  # the linearity W*x+b
        self.relu1 = nn.ReLU(inplace=True)  # the non-linearity

        # specify fc layer 2 - in 640, out 100
        self.linear2 = nn.Linear(1000, 100, bias=True)  # the linearity W*x+b

        # specify fc layer 2 - in 100, out 10
        self.linear3 = nn.Linear(100, 10, bias=True)  # the linearity W*x+b

        # add a softmax to the last layer
        self.logsoftmax = nn.LogSoftmax(dim=1)  # the softmax

    # define network forward pass
    def forward(self, images):
        # high-level feature learning via convolutional layers

        # define conv layer 1 forward pass
        x = self.pool1(self.relu1(self.conv1(images)))

        # define conv layer 2 forward pass
        x = self.pool1(self.relu1(self.conv2(x)))

        # reshape image pixels
        x = x.view(-1, 6*6*60)

        # define fc layer 1 forward pass
        x = self.relu1(self.linear1(x))

        # define fc layer 2 forward pass
        x = self.relu1(self.linear2(x))

        # define layer 3 forward pass
        x = self.logsoftmax(self.linear3(x))

        # return forward pass result
        return x

# Analysis of the results

Select below the model to analyze:
- 1 - IaroHasNoFashionNet1
- 2 - IaroHasNoFashionNet2
- 3 - IaroHasNoFashionNet3
- 4 - IaroHasNoFashionNet4

In [None]:
# Select the model to analyze
model = 1

In [None]:
# load the training and evaluation results
file_url = f'https://github.com/iarokr/gserm-dl-homework/raw/main/models_fmnist/m{model}_training_results.pkl'
model_results = pd.read_pickle(file_url)
model_results

# Plot training losses for different learning rates and mini-batch sizes

In [None]:
# extract the training loss
df = model_results.explode('train_epoch_losses', ignore_index=True)
df['pos']=df.index % 100

# change the values in min_batch_size column to string
df.mini_batch_size = df.mini_batch_size.astype(str)

# using sns create subplots for each unique value of learning rate
fig, ax = plt.subplots(2, 2, sharex=True, figsize=(16,8))
fig.suptitle('IaroHasNoFashionNet' + str(model) + '- Train Epoch Losses')

for lr, ax in zip(df.lr.unique(), ax.flatten()):
    sns.lineplot(x='pos', y='train_epoch_losses', data=df[df.lr == lr], hue='mini_batch_size', palette="Greens", ax=ax)
    ax.set_title(f'lr = {lr}')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Train Loss')
    ax.set_ylim(0, 2.5)
    ax.set_xlim(0, 100)
    ax.grid(True)


# Plot evaluation accuracies for different learning rates and mini-batch sizes

In [None]:
# set up the figure and axes
df = model_results.copy(deep=True)

df.mini_batch_size = df.mini_batch_size.astype("category")

fig = plt.figure(figsize=(12, 8))
fig.suptitle('IaroHasNoFashionNet' + str(model) + '- Evaluation accuracy')

ax = fig.subplots()
ax.set_xlabel('Learning rate')
ax.set_ylabel('Evaluation accuracy')
sns.stripplot(x='lr', y='accuracy', data=df, hue='mini_batch_size', s=10, ax=ax)
ax.set_ylim(0.5, 1)

# Plot confusion matrix for the selected model

Specify the parameters for the model to plot the confusion matrix

In [None]:
mat_lr = 0.001 # possible values: 0.0005, 0.001, 0.005, 0.01
mat_mbs = 8 # possible values: 8, 16, 32, 64, 128, 256, 512

In [None]:
mat = model_results[(model_results.lr == mat_lr) & (model_results.mini_batch_size == mat_mbs)].conf_matrix.values[0]
# define fashion mnist classes
fashion_classes = {0: 'T-shirt/top',
                    1: 'Trouser',
                    2: 'Pullover',
                    3: 'Dress',
                    4: 'Coat',
                    5: 'Sandal',
                    6: 'Shirt',
                    7: 'Sneaker',
                    8: 'Bag',
                    9: 'Ankle boot'}
# initialize the plot and define size
plt.figure(figsize=(8, 8))

# plot corresponding confusion matrix
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False, cmap='YlOrRd_r', xticklabels=fashion_classes.values(), yticklabels=fashion_classes.values())
plt.tick_params(axis='both', which='major', labelsize=8, labelbottom = False, bottom=False, top = False, left = False, labeltop=True)

# set plot title
plt.title('IaroHasNoFashionNet' + str(model) + ' classification matrix (lr = ' + str(mat_lr) + ', mbs = ' + str(mat_mbs) + ')')

# set plot axis lables
plt.xlabel('[true label]')
plt.ylabel('[predicted label]');

# Comparison of training and evaluation losses

In [None]:
fig = plt.figure(figsize=(12, 8))
fig.suptitle('IaroHasNoFashionNet' + str(model) + '- Training vs Evaluation loss')

ax = fig.subplots()
ax.set_xlabel('Training loss')
ax.set_ylabel('Evaluation loss')
sns.scatterplot(x='min_loss', y='eval_loss', data=df, hue='mini_batch_size', s=100, ax=ax)
ax.set_xlim(-0.01, 1.0)
ax.set_ylim(-0.01, 1.0)