This CNN model was trained on the MNIST dataset. The MNIST dataset is a database of handwritten digits that is commonly used for training various image processing systems. This one trains and evaluates without using GPU.

## Import statements

In [3]:
# import all necessary items
import time
import platform
import io
from google.colab import files
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm 

# install pytorch and Torchvision
def install_pytorch():
    os = platform.system()
    if os == "Linux":
        !pip3 install http://download.pytorch.org/whl/cu90/torch-0.4.0-cp36-cp36m-linux_x86_64.whl
    elif os == "Windows":
        !pip3 install http://download.pytorch.org/whl/cu90/torch-0.4.0-cp36-cp36m-win_amd64.whl 
    !pip3 install torchvision
    
install_pytorch()

# now that Pytorch and Torchvision are installed, import the relevant libraries
import torch
import torch.nn as nn
import torch.optim as optim

Collecting torch==0.4.0 from http://download.pytorch.org/whl/cu90/torch-0.4.0-cp36-cp36m-linux_x86_64.whl
[?25l  Downloading http://download.pytorch.org/whl/cu90/torch-0.4.0-cp36-cp36m-linux_x86_64.whl (566.4MB)
[K    100% |████████████████████████████████| 566.4MB 60.1MB/s 
tcmalloc: large alloc 1073750016 bytes == 0x5cf18000 @  0x7f97385f21c4 0x46d6a4 0x5fcbcc 0x4c494d 0x54f3c4 0x553aaf 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54e4c8
[?25hInstalling collected packages: torch
Successfully installed torch-0.4.0
Collecting torchvision
[?25l  Downloading https://files.pythonhosted.org/packages/ca/0d/f00b2885711e08bd71242ebe7b96561e6f6d01fdb4b9dcf4d37e2e13c5e1/torchvision-0.2.1-py2.py3-none-any.whl (54kB)
[K    100% |████████████████████████████████| 61kB 1.9MB/s 
[?25hCollecting pillow>=4.1.1 (from torchvisi

Installing collected packages: pillow, torchvision
  Found existing installation: Pillow 4.0.0
    Uninstalling Pillow-4.0.0:
      Successfully uninstalled Pillow-4.0.0
Successfully installed pillow-5.1.0 torchvision-0.2.1


## Dataset

### Download the dataset from PyTorch

Import the MNIST dataset using PyTorch tools (PyTorch has an [MNIST Dataset class](https://pytorch.org/docs/stable/torchvision/datasets.html?highlight=mnist#mnist). Download the MNIST dataset into training and test datasets. 

In [4]:
# import the necessary libraries 
import torch
from torchvision.datasets import MNIST
import torchvision.transforms as transforms

# create the training and test datasets
train_dataset = MNIST(root='../data', train=True, transform=transforms.ToTensor(), download=True)

test_dataset = MNIST(root='../data', train=False, transform=transforms.ToTensor(), download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


### Load the dataset

For training and evaluation purposes, the dataset objects are  wrapped in PyTorch [DataLoader objects](https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader).  The function<b><a href="http://pytorch.org/docs/master/data.html"> torch.utils.data.DataLoader </a></b> which divides the dataset automatically in mini-batches. 

In [5]:
batch_size = 100

# Dataloader gives object that you can iterate on 
# will need this to enumerate/train data
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

# checking
d = list(train_loader)
len(d)

600

### Take the train_loader set for the following model

In [6]:
x = list(train_loader)[0][0]
x.shape

torch.Size([100, 1, 28, 28])

## Convolutional Neural Network model

Define the neural network model in the CNN class below. The CNN class will inherit from the [`nn.Module` class](https://pytorch.org/docs/stable/nn.html#module) (this class includes some neural net boilerplate code and magic methods).

In [7]:
# import the nn.Module class
import torch.nn as nn

# defines the convolutional neural network
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.block1 = nn.Sequential(
            #1x28x28
            nn.Conv2d(in_channels=1, 
                      out_channels=16, 
                      kernel_size=5, 
                      stride=1, 
                      padding=2),
            #16x28x28
            nn.MaxPool2d(kernel_size=2),
            #16x14x14
        )
        #16x14x14
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels=16, 
                      out_channels=32, 
                      kernel_size=5, 
                      stride=1, 
                      padding=2),
            #32x14x14
            nn.MaxPool2d(kernel_size=2)
            #32x7x7
        ) 
        # linearly 
        self.block3 = nn.Sequential(
            nn.Linear(32*7*7, 500),
            nn.Linear(500, 300),
            nn.Linear(300, 100),
            nn.Linear(100, 10)
        )
        
    
    def forward(self, x): 
        out = self.block1(x)
        out = self.block2(out)
        # flatten the dataset
        out = out.view(-1, 32*7*7)
        out = self.block3(out)
        
        return out

# convolutional neural network model
model = CNN()

# print summary of the neural network model to check if everything is fine. 
print(model)
print("# parameter: ", sum([param.nelement() for param in model.parameters()]))

CNN(
  (block1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Linear(in_features=1568, out_features=500, bias=True)
    (1): Linear(in_features=500, out_features=300, bias=True)
    (2): Linear(in_features=300, out_features=100, bias=True)
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
# parameter:  979158


### Print out the new shape of the dataset

In [8]:
out = model(x)

# shape of dataset before model
print("Previous shape:", x.shape)

# new shape of dataset
print("New shape:", out.shape)

Previous shape: torch.Size([100, 1, 28, 28])
New shape: torch.Size([100, 10])


### Define the loss

Cross-entropy or log loss will be used as the loss function. This loss function is ideal for classification models

In [0]:
criterion = nn.CrossEntropyLoss()

### Set the learning rate

Set the learning rate on which to train the model.

In [0]:
learning_rate = 1e-3

### Optimizer

For this optimizer, SGD (Stochastic Gradient Descent) will be used, using the learning rate defined above.  

In [0]:
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

## Train & Evaluate the model

Now that the model has been defined, it's time to train & evaluate the model. The model will be trained on epochs of mini-batches (train_loader).

### Train

The code below is trained on 10 epochs (an epoch is a complete representation of the dataset to be learned).

### Evaluate

This is very similar to training, except that the gradient is not computed and the parameters are not updated. 

In [12]:
t0 = time.time()

# variable to store the total loss
total_loss = []

# for loop that iterates over all the epochs
num_epochs = 10
for epoch in range(num_epochs):
    
    # variables to store/keep track of the loss and number of iterations
    train_loss = 0
    num_iter = 0
    
    # train the model
    model.train()
    
    # Iterate over data.
    for i, (images, labels) in enumerate(train_loader):  
    
        # Zero the gradient buffer
        # resets the gradient after each epoch so that the gradients don't add up
        optimizer.zero_grad()  
        
        # Forward
        outputs = model(images)
        # calculate the loss
        loss = criterion(outputs, labels)
        total_loss.append(loss)
        # Backward
        loss.backward()
        
        # Optimize
        # loops through all parameters and updates weights by using the gradients 
        optimizer.step()
        # update the training loss and number of iterations
        train_loss += loss.data[0]
        num_iter += 1
    
    print('Epoch: {}, Loss: {:.4f}'.format(
          epoch+1, train_loss/num_iter))
    
    # evaluate the model
    model.eval()

    correct = 0
    total = 0

    # Iterate over data.
    for images, labels in test_loader:  
          
       # Forward
       outputs = model(images)
       loss = criterion(outputs, labels)  
       _, predicted = torch.max(outputs.data, 1)
    
       # Statistics
       total += labels.size(0)
       correct += (predicted == labels).sum()
       
    print('Accuracy on the test set: {}%'.format(100 * correct / total))
tf = time.time()
print()
print("time: {} s" .format(tf-t0))



Epoch: 1, Loss: 2.2990
Accuracy on the test set: 13%
Epoch: 2, Loss: 2.2890
Accuracy on the test set: 24%
Epoch: 3, Loss: 2.2770
Accuracy on the test set: 36%
Epoch: 4, Loss: 2.2601
Accuracy on the test set: 50%
Epoch: 5, Loss: 2.2331
Accuracy on the test set: 56%
Epoch: 6, Loss: 2.1824
Accuracy on the test set: 55%
Epoch: 7, Loss: 2.0661
Accuracy on the test set: 62%
Epoch: 8, Loss: 1.7418
Accuracy on the test set: 69%
Epoch: 9, Loss: 1.1543
Accuracy on the test set: 77%
Epoch: 10, Loss: 0.7873
Accuracy on the test set: 81%

time: 828.4415647983551 s
