# Dat Do

# Handwritten Number Digits Recognition

# 8-6-18



### About this notebook

This is a Convolutional Neural Network I've created and trained in order to recognize number digits handwritten by people. I've tried to add a fair amount of commentary along the way in order to help visualize what is going on better. The dataset I'm utilizing is from the MNIST Database of Handwritten Digits which contains a training set of 60,000 28x28 examples and a test set of 10,000. The digits have been size-normalized and centered in a fixed-size image. I hope you'll learn something from this and please leave any thoughts or even suggestions that you may have!

In [None]:
# Import packages.
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.autograd import Variable
from torchvision import datasets, transforms

# Dataset is provided through the 'torchdivision' package. In this CNN, the MNIST dataset is downloaded down below.
# The batch size is 100 i.e. for each iteration, the model takes 100 images to feed through the convolutional layers, then through the fully connected layers.

# Define a transform to normalize the data.
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                             ])

# Download and load the training data. Batch size equals 100.
trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True)

# Download and load the test data. Batch size equals 100.
testset = datasets.MNIST('MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=True)

In [None]:
# The training data is loaded into 'trainloader'. Here, the iterator 'iter(trainloader)' loops through the first batch.
# Images is a tensor with size (100, 1, 28, 28) i.e. 100 images per batch, 1 color channel, and 28x28 images.
dataiter = iter(trainloader)
images, labels = dataiter.next()
images.size()

In [None]:
# Visualize.
plt.imshow(images[99].numpy().squeeze(), cmap='Greys_r')
print("Target: ", labels[99])

In [None]:
# Convolutional neural network (two convolutional layers).

# First convolutional layer will have 1 input channel, 32 output channels (depth), kernel size of 5x5, stride of 1, and padding of 2.
# Second convolutional layer will have 32 input channels, 64 output channels (depth), kernel size of 5x5, stride of 1, and padding of 2.
# Max pooling operation with pooling size of 2x2 and stride of 2 to reduce the effective image size by a factor of 2.
# First fully connected layer will have 7x7x64 nodes connecting to 1000 nodes.
# Second fully connected layer will have 1000 nodes connecting to 10 nodes.
class ConvNet(nn.Module):
    def __init__(self, n_conv1=32, n_conv2=64):
        super().__init__()
        
        self.n_conv1, self.n_conv2 = n_conv1, n_conv2
        
        self.conv1 = nn.Conv2d(1, self.n_conv1, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(self.n_conv1, self.n_conv2, kernel_size=5, stride=1, padding=2)
        self.mp = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop_out = nn.Dropout()    # Drop-out layer to avoid over-fitting in the model.
        self.fc1 = nn.Linear(7 * 7 * n_conv2, 1000)
        self.fc2 = nn.Linear(1000, 10)
    
    # Forward pass through the network to define how the data flows through the layers.
    # Takes input argument of 'x'; data that is passed through the model i.e. a batch of data.
    # conv_layer > ReLU_activation > max_pooling (x2) > flattening data into 1x3136 > fully_con_layer > ReLU_activation > fully_con_layer.
    def forward(self, x):
        out = self.mp(F.relu(self.conv1(x)))
        out = self.mp(F.relu(self.conv2(out)))
        out = out.view(-1, self.n_conv2 * 7 * 7)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
# Creating an instance of the ConvNet class.
model = ConvNet()

# Defining loss function and optimizer.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Visualize intitial weights.
print('Initial weights - ', model.fc1.weight)

In [None]:
num_epochs = 1

# Train the model.
running_loss= 0.0

# Two loops - first, the number of epochs (1) is looped over, and within the loop, 'trainloader' is iterated over using 'enumerate'.
for epoch in range(num_epochs):
    
    # For each iteration (i) in trainloader ("training dataset"), "batch_size" of 100 images/labels would have been trained on.
    # For example, 100 iterations in trainloader would mean that 10,000 images/labels have been used to train on. (100x100=10k).
    for i, (images, labels) in enumerate(trainloader):
        # Run the forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backprop and perform Adam optimisation.
        optimizer.zero_grad()    # Gradients are zeroed.
        loss.backward()    # Perform back-propagation on 'loss'.
        optimizer.step()    # Perform Adam optimizer training step once gradients have been calculated.
        
        # Track loss & accuracy.
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)    # Returns the index of the maximum value in a tensor i.e. Predictions of the model.
        correct = (predicted == labels).sum().item()    # Perform comparisons between the Predictions and true Labels then sums them to determine number of correct predictions.
        
        # For every 100 iterations of the inner loop, progress is printed.
        running_loss += loss.item()
        if (i + 1) % 100 == 0:
            print(i)    # For reference. 
            print('[1, %d] Loss: %.5f Accuracy: %.3f' %
                  (i + 1, running_loss / 100, (correct / total) * 100))
            running_loss = 0.0

In [None]:
# Visualize updated weights.
print('Updated weights - ', model.fc1.weight)

In [None]:
# Test the model iterating over 'testloader'.

i = 88

dataiter = iter(testloader)
images, labels = dataiter.next()

plt.imshow(images[i].numpy().squeeze(), cmap='Greys_r')
print("Target: ", labels[i])

outputs = model(images)

_, predicted = torch.max(outputs.data, 1)

print("Predicted:", predicted[i])