# Image Classification using a Convolutional Neural Network

Next, we measure the performance of the LeNet 5 convolutional neural network (simplified and using ReLUs) on the MNIST dataset.

In [1]:
# Code adapted from https://github.com/activatedgeek/LeNet-5/

from collections import OrderedDict
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision.datasets.mnist import MNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

class LeNet5(nn.Module):
    """
    Input - 1x32x32
    C1 - 6@28x28 (5x5 kernel)
    S2 - 6@14x14 (2x2 kernel, stride 2) Subsampling
    C3 - 16@10x10 (5x5 kernel, complicated shit simplified here)
    S4 - 16@5x5 (2x2 kernel, stride 2) Subsampling
    C5 - 120@1x1 (5x5 kernel)
    F6 - 84
    F7 - 10 (Output)
    """
    def __init__(self):
        super(LeNet5, self).__init__()

        self.convnet = nn.Sequential(OrderedDict([
            ('c1', nn.Conv2d(1, 6, kernel_size=(5, 5))),
            ('relu1', nn.ReLU()),
            ('s2', nn.MaxPool2d(kernel_size=(2, 2), stride=2)),
            ('c3', nn.Conv2d(6, 16, kernel_size=(5, 5))),
            ('relu3', nn.ReLU()),
            ('s4', nn.MaxPool2d(kernel_size=(2, 2), stride=2)),
            ('c5', nn.Conv2d(16, 120, kernel_size=(5, 5))),
            ('relu5', nn.ReLU())
        ]))

        self.fc = nn.Sequential(OrderedDict([
            ('f6', nn.Linear(120, 84)),
            ('relu6', nn.ReLU()),
            ('f7', nn.Linear(84, 10)),
            #('sig7', nn.LogSoftmax())
        ]))

    def forward(self, img):
        output = self.convnet(img)
        output = output.view(-1, 120)
        output = self.fc(output)
        return output
    

data_train = MNIST('data/mnist',
                   download=True,
                   transform=transforms.Compose([
                       transforms.Resize((32, 32)),
                       transforms.ToTensor()]))
data_test = MNIST('data/mnist',
                  train=False,
                  download=True,
                  transform=transforms.Compose([
                      transforms.Resize((32, 32)),
                      transforms.ToTensor()]))
data_train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=8)
data_test_loader = DataLoader(data_test, batch_size=1024, num_workers=8)

net = LeNet5()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=2e-3)

def train(epoch):
    net.train()
    loss_list, batch_list = [], []
    for i,(images, labels) in enumerate(data_train_loader):
        images, labels = Variable(images), Variable(labels)

        optimizer.zero_grad()

        output = net(images)

        loss = criterion(output, labels)

        loss_list.append(loss.data[0])
        batch_list.append(i+1)

        if i % 10 == 0:
            print('Train - Epoch %d, Batch: %d, Loss: %f' % (epoch, i, loss.data[0]))

        loss.backward()
        optimizer.step()


def test():
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    for i, (images, labels) in enumerate(data_test_loader):
        images, labels = Variable(images), Variable(labels)
        output = net(images)
        avg_loss += criterion(output, labels).sum()
        pred = output.data.max(1)[1]
        total_correct += pred.eq(labels.data.view_as(pred)).sum()

    avg_loss /= len(data_test)
    print('Test Avg. Loss: %f, Accuracy: %f' % (avg_loss.data[0], float(total_correct) / len(data_test)))


def train_and_test(epoch):
    train(epoch)
    test()


def main():
    for e in range(1, 5):  # Change 5 to 16 for better performance
        train_and_test(e)


if __name__ == '__main__':
    main()

Train - Epoch 1, Batch: 0, Loss: 2.299960
Train - Epoch 1, Batch: 10, Loss: 2.065571
Train - Epoch 1, Batch: 20, Loss: 0.950920
Train - Epoch 1, Batch: 30, Loss: 0.748051
Train - Epoch 1, Batch: 40, Loss: 0.472438
Train - Epoch 1, Batch: 50, Loss: 0.405190
Train - Epoch 1, Batch: 60, Loss: 0.378026
Train - Epoch 1, Batch: 70, Loss: 0.337037
Train - Epoch 1, Batch: 80, Loss: 0.311086
Train - Epoch 1, Batch: 90, Loss: 0.203185
Train - Epoch 1, Batch: 100, Loss: 0.330358
Train - Epoch 1, Batch: 110, Loss: 0.208700
Train - Epoch 1, Batch: 120, Loss: 0.226665
Train - Epoch 1, Batch: 130, Loss: 0.198784
Train - Epoch 1, Batch: 140, Loss: 0.242382
Train - Epoch 1, Batch: 150, Loss: 0.242769
Train - Epoch 1, Batch: 160, Loss: 0.195220
Train - Epoch 1, Batch: 170, Loss: 0.214870
Train - Epoch 1, Batch: 180, Loss: 0.192313
Train - Epoch 1, Batch: 190, Loss: 0.151626
Train - Epoch 1, Batch: 200, Loss: 0.165912
Train - Epoch 1, Batch: 210, Loss: 0.161444
Train - Epoch 1, Batch: 220, Loss: 0.130290

The accuracy we get with LeNet is above 98%.