# Train FASHION-MNIST dataset using neural network with fully connected layers

In [None]:
!pip install seaborn

In [None]:
%matplotlib inline

import numpy as np
import torch

import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torchvision.datasets import FashionMNIST
dataset_root = "./data/fashion_pt" 
Dataset = FashionMNIST

## Data

In [None]:
# Download and prepare data

transform_train = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

trainset = Dataset(dataset_root, download=True, train=True, transform=transform_train)
testset = Dataset(dataset_root, download=True, train=False, transform=transform_test)

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
           'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
num_classes = len(classes)

### Data loader

In [None]:
# Create data loader

batch_size = 16
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
# Show some examples using iterator

dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()

idx = np.random.choice(batch_size)
plt.imshow(np.squeeze(images[idx]), cmap='gray')

print('Images size: ' + str(images.shape))
print('Single squeezed image: ' + str(np.squeeze(images[idx]).shape))
print('Label: ' + str(labels[idx]))

In [None]:
# Show a full batch

fig = plt.figure(figsize=(25, 8))
for idx in np.arange(batch_size):
    ax = fig.add_subplot(4, batch_size/4, idx+1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(images[idx]), cmap='gray')
    ax.set_title(classes[labels[idx]])

In [None]:
# Show image with pixel values

# Getting an image
idx = np.random.choice(batch_size)
img = np.squeeze(images[idx])

# Show image
fig = plt.figure(figsize = (12,12)) 
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5

# Display pixel values
for x in range(width):
    for y in range(height):
        val = img[x][y].round(2) if img[x][y] !=0 else 0 # 2 decimal rounding
        ax.annotate(str(val), xy=(y,x),
                    horizontalalignment='center',
                    verticalalignment='center',
                    color='white' if img[x][y]<thresh else 'black') # Just to be able to see the number

## Let's define Neural Network with pytorch

In [None]:
from torch import nn
from torch import optim
import torch.nn.functional as F

In [None]:
input_dim = 28 * 28
num_classes = len(classes)

class LinearNN(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = x.view(x.size(0), -1) #flatten the image 
        
        # TODO: Implement the forward step of the NN 
        
        return x
        
model = LinearNN()
model

In [None]:
#Define learning rate, loss function and optimizer

learning_rate = 0.0001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Let's see how it performs before training

In [None]:
def eval_model_on_test_set():
    correct = 0
    total = 0
    total_loss = torch.zeros(1)
    model.eval()

    for images, labels in test_loader:

        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1) # outputs -> value, index

        total += labels.size(0)
        correct += (predicted == labels).sum()

    accuracy = 100.0 * correct.item() / total
    print('Accuracy: {}, Test Loss: {}'.format(accuracy, (total_loss * batch_size / total).item()))

eval_model_on_test_set()    

## Let's train it

In [None]:
def train(n_epochs):
    model.train()
    
    loss_over_time = [] # to track the loss as the network trains
    
    for epoch in range(n_epochs):  # loop over the dataset multiple times
        
        running_loss = 0.0
        
        for batch_i, data in enumerate(train_loader):
            # get the input images and their corresponding labels
            inputs, labels = data # inputs.size() -> torch.Size([16, 1, 28, 28])

            # zero the parameter (weight) gradients
            optimizer.zero_grad()

            # forward pass to get outputs
            outputs = model(inputs)

            # calculate the loss using CrossEntropyLoss as defined
            loss = criterion(outputs, labels)

            # backward pass to calculate the parameter gradients
            loss.backward()

            # update the parameters
            optimizer.step()

            # print loss statistics
            # to convert loss into a scalar and add it to running_loss, we use .item()
            running_loss += loss.item()
            
            if batch_i % 1000 == 999:    # print every 1000 batches
                avg_loss = running_loss/1000
                # record and print the avg loss over the 1000 batches
                loss_over_time.append(avg_loss)
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, batch_i+1, avg_loss))
                running_loss = 0.0
                eval_model_on_test_set()
                model.train()

    print('Finished Training')
    return loss_over_time


In [None]:
n_epochs = 10 # start small to see if your model works, initially

# call train and record the loss over time
training_loss = train(n_epochs)

In [None]:
learning_rate = 0.00003
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
n_epochs = 10 

# call train and record the loss over time
training_loss = train(n_epochs)

## Let's test it

In [None]:
# initialize tensor and lists to monitor test loss and accuracy
test_loss = torch.zeros(1)
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))

# set the module to evaluation mode
model.eval()

for batch_i, data in enumerate(test_loader):
    
    # get the input images and their corresponding labels
    inputs, labels = data
    
    # forward pass to get outputs
    outputs = model(inputs)

    # calculate the loss
    loss = criterion(outputs, labels)
            
    # update average test loss 
    test_loss = test_loss + ((torch.ones(1) / (batch_i + 1)) * (loss.data - test_loss))
    
    # get the predicted class from the maximum value in the output-list of class scores
    _, predicted = torch.max(outputs.data, 1)
    
    # compare predictions to true label
    # this creates a `correct` Tensor that holds the number of correctly classified images in a batch
    correct = np.squeeze(predicted.eq(labels.data.view_as(predicted)))
    
    # calculate test accuracy for *each* object class
    # we get the scalar value of correct items for a class, by calling `correct[i].item()`
    for i in range(labels[0]):
        label = labels.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

print('Test Loss: {:.6f}\n'.format(test_loss.numpy()[0]))

for i in range(num_classes):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

        
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

In [None]:
# Show a batch of images and its results
batch_size=32
test_loader2 = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)

# obtain one batch of test images
dataiter = iter(test_loader2)
images, labels = dataiter.next()

# get predictions
outputs = model(images)
preds = np.squeeze(outputs.data.max(1, keepdim=True)[1].numpy())
images = images.numpy()

# plot the images in the batch, along with predicted and true labels
fig = plt.figure(figsize=(25, 8))
for idx in np.arange(batch_size):
    ax = fig.add_subplot(4, batch_size/4, idx+1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(images[idx]), cmap='gray')
    ax.set_title("{} ({})".format(classes[preds[idx]], classes[labels[idx]]),
                 color=("green" if preds[idx]==labels[idx] else "red"))

In [None]:
# Calculate confusion matrix
from sklearn.metrics import confusion_matrix

batch_size=10000
test_loader3 = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=True)
dataiter = iter(test_loader3)
images, labels = dataiter.next()
outputs = model(images)
array  = confusion_matrix(labels, np.squeeze(outputs.data.max(1, keepdim=True)[1].numpy()))

In [None]:
# Show confusion matrix

import pandas as pd
import seaborn as sn

df_cm = pd.DataFrame(array, index = [i for i in classes], columns = [i for i in classes ])
plt.figure(figsize = (20,15))
sn.heatmap(df_cm, annot=True)

## Competition

Who can get the best accuracy on the test set?

You can try to change your model arch using:
* More linear layers (https://pytorch.org/docs/stable/nn.html#linear-layers)
+ Activation functions like Relu (https://pytorch.org/docs/stable/nn.html#torch.nn.ReLU) or Tanh (https://pytorch.org/docs/stable/nn.html#torch.nn.Tanh)
* Dropout(https://pytorch.org/docs/stable/nn.html#dropout-layers)

## Learnings

https://blog.slavv.com/37-reasons-why-your-neural-network-is-not-working-4020854bd607

* Apply same transforms on train and test data 
* Relu does not make sense on last linear layer
* Use model.train() // model.eval() to make sure dropout is not applied on test data
* Use appropiate learning rates
* NOT use softmax as nn.CrossEntropyLoss already includes logSoftMax