In [1]:
#MNIST digit recognition, classification problem
# Classifying image of hand written digits into numbers 0-9

In [2]:
# Data Loader, Data Transforms, Batches
# Design the model (input size, output size, hidden layers, forward)
#       Multi layer neural network, hidden layer activation function, final layer activation function
#       loss function, optimiser
# Training Loop (batch training)
#       Forward Pass
#       Backward Pass
#       Update weights
# Model Evaluation
#       Test Accuracy
# Plots
#       Accuracy history, Loss history

#### Imports


In [3]:
from torch.utils.tensorboard import SummaryWriter

In [4]:
writer = SummaryWriter("runs/mnist")

In [5]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

import numpy as np
import matplotlib.pyplot as plt
import sys

#### Device configuration

In [6]:
torch.cuda.is_available()

True

In [7]:
#device configuration
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

#### Hyperparameters

In [8]:
#### Hyperparameters
input_size = 784 #28 x 28 images
hidden_size1 = 500
hidden_size2 = 124
hidden_size3 = 64
num_classes = 10 #number of output classes
num_epochs = 20
batch_size = 100
learning_rate = 0.001

# The following model performed very poorly
# #### Hyperparameters
# input_size = 784 #28 x 28 images
# hidden_size1 = 124
# hidden_size2 = 64
# num_classes = 10 #number of output classes
# num_epochs = 20
# batch_size = 100
# learning_rate = 0.001

#### Dataset

In [9]:
train_dst = torchvision.datasets.MNIST(root='/data',train=True,transform=transforms.ToTensor(),download=True)
test_dst = torchvision.datasets.MNIST(root='/data',train=False,transform=transforms.ToTensor())

In [10]:
len(test_dst)

10000

In [11]:
train_loader = DataLoader(dataset=train_dst,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_dst,batch_size=batch_size)

In [12]:
len(train_loader)

600

##### Have a look at first batch of data

In [13]:
dataiter = iter(train_loader)
batch_data = dataiter.next()

In [14]:
inputs,labels = batch_data

In [15]:
len(batch_data)

2

In [16]:
inputs.shape

torch.Size([100, 1, 28, 28])

In [17]:
labels.shape

torch.Size([100])

###### Visualise Images

In [18]:
# for i in range(6):
#     plt.subplot(2,3,i+1)
#     plt.imshow(inputs[i][0],cmap='gray') #accessing the first channel
#plt.show()
#instead of plotting, we write images to tensor board
img_grid = torchvision.utils.make_grid(inputs)
writer.add_image('mnist_images',img_grid)
writer.close() #flushing the outputs


#### Design the Model

##### Neural Network Class

In [19]:
class NeuralNet_HW(nn.Module):
    def __init__(self,input_features,hidden_size1,hidden_size2,hidden_size3,output_features):
        super(NeuralNet_HW,self).__init__()
        self.lin1 = nn.Linear(input_features,hidden_size1)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(hidden_size1,hidden_size2)
        self.lin3 = nn.Linear(hidden_size2,hidden_size3)
        self.lin4 = nn.Linear(hidden_size3,output_features) 
        self.softmax = nn.Softmax() #remove this if we are using cross entropy loss
    def forward(self,x):
        out = self.lin1(x)
        out = self.relu(out)
        out = self.lin2(out)
        out = self.relu(out)
        out = self.lin3(out)
        out = self.relu(out)
        out = self.lin4(out)
        return out
        #out = self.softmax(out) we dont apply this, as we are using cross entropy loss, it applies softmax for as

In [20]:
model = NeuralNet_HW(input_size,hidden_size1,hidden_size2,hidden_size3,num_classes).to(device)

In [21]:
 # model.summary()  check this out later

In [22]:
model

NeuralNet_HW(
  (lin1): Linear(in_features=784, out_features=500, bias=True)
  (relu): ReLU()
  (lin2): Linear(in_features=500, out_features=124, bias=True)
  (lin3): Linear(in_features=124, out_features=64, bias=True)
  (lin4): Linear(in_features=64, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)

##### Loss Function and Optimiser

In [23]:
loss_func = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [24]:
writer.add_graph(model,inputs.view(-1,28*28).to(device)) #send one batch of data to the model
writer.close()


#### Training Loop

In [25]:
num_steps_per_epoch = len(train_loader)




for epoch in range(num_epochs) : #looping over epochs
    running_loss = 0 #
    running_correct_preds = 0 #

    for i,(inputs,labels) in enumerate(train_loader): #looping over batches

        
        #Flatten the images

        #inputs have shape 100,1,28,28, we need to change this to 100,784 
        inputs = inputs.view(-1,28*28).to(device) #copy the to gpu, as we do training on the gpu 
        labels = labels.to(device)

        #forward pass
        y_preds = model(inputs)
        # print(y_preds.shape), should be n_samples x n_classes
        loss = loss_func(y_preds,labels)

        #backward pass
        optimiser.zero_grad()
        loss.backward()

        #weight updation
        optimiser.step()

        running_loss += loss.item()
        _,preds = torch.max(y_preds,1) #returns values and index, index is the class label
        current_step_correct_preds = torch.eq(labels,preds).sum().item()
        running_correct_preds += current_step_correct_preds

        if (i+1)%200 == 0:
            print(f"epoch {epoch+1} / {num_epochs}, step {i+1} / {num_steps_per_epoch}, loss = {loss.item():.4f}, acc = {100.0*current_step_correct_preds/inputs.shape[0]}")

    
    epoch_loss = running_loss/num_steps_per_epoch
    epoch_acc = running_correct_preds/len(train_dst)

    writer.add_scalar('training loss',epoch_loss,global_step=epoch)
    writer.add_scalar('accuracy',epoch_acc,global_step=epoch)
          

            

        
        

epoch 1 / 20, step 200 / 600, loss = 0.2653, acc = 96.0
epoch 1 / 20, step 400 / 600, loss = 0.0693, acc = 99.0
epoch 1 / 20, step 600 / 600, loss = 0.1919, acc = 93.0
epoch 2 / 20, step 200 / 600, loss = 0.0646, acc = 98.0
epoch 2 / 20, step 400 / 600, loss = 0.1045, acc = 97.0
epoch 2 / 20, step 600 / 600, loss = 0.0739, acc = 99.0
epoch 3 / 20, step 200 / 600, loss = 0.0426, acc = 99.0
epoch 3 / 20, step 400 / 600, loss = 0.0162, acc = 100.0
epoch 3 / 20, step 600 / 600, loss = 0.0626, acc = 98.0
epoch 4 / 20, step 200 / 600, loss = 0.0306, acc = 98.0
epoch 4 / 20, step 400 / 600, loss = 0.1033, acc = 97.0
epoch 4 / 20, step 600 / 600, loss = 0.0190, acc = 100.0
epoch 5 / 20, step 200 / 600, loss = 0.0315, acc = 99.0
epoch 5 / 20, step 400 / 600, loss = 0.0069, acc = 100.0
epoch 5 / 20, step 600 / 600, loss = 0.0196, acc = 99.0
epoch 6 / 20, step 200 / 600, loss = 0.1248, acc = 96.0
epoch 6 / 20, step 400 / 600, loss = 0.0788, acc = 97.0
epoch 6 / 20, step 600 / 600, loss = 0.0108, 

#### Model Evaluation

##### Test Accuracy

In [26]:
with torch.no_grad():
    correct_pred = 0
    n_samples = 0
    for i,(inputs,labels) in enumerate(test_loader):
        inputs = inputs.view(-1,28*28).to(device) #copy to gpu
        labels = labels.to(device)

        outputs = model(inputs)

        _,preds = torch.max(outputs,1) #returns value snad index, inndex is the class label

        n_samples += inputs.shape[0]
        correct_pred += torch.eq(labels,preds).sum().item()
        
    accuracy = (100.0*correct_pred)/n_samples
    print(f"Test accuracy of the model = {accuracy:.2f}%")

Test accuracy of the model = 98.37%


In [27]:
with torch.no_grad():
    correct_pred = 0
    n_samples = 0
    for i,(inputs,labels) in enumerate(train_loader):
        inputs = inputs.view(-1,28*28).to(device) #copy to gpu
        labels = labels.to(device)

        outputs = model(inputs)

        _,preds = torch.max(outputs,1) #returns value snad index, inndex is the class label

        n_samples += inputs.shape[0]
        correct_pred += torch.eq(labels,preds).sum().item()
        
    accuracy = (100.0*correct_pred)/n_samples
    print(f"Train accuracy of the model = {accuracy:.2f}%")

Train accuracy of the model = 99.84%
