# Image Classification using a 7-Layer Neural Network

Next, we measure the performance of a rather deep (7-layered) multi-layer neural network on the MNIST dataset.

In [1]:
# Code adapted from https://github.com/activatedgeek/LeNet-5/

from collections import OrderedDict
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision.datasets.mnist import MNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

class MultilayerClassifier(nn.Module):
    """
    Input - 1x32x32
    Output - 10 (Output)
    """
    def __init__(self):
        super(MultilayerClassifier, self).__init__()

        self.fc = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(32 * 32, 32 * 20)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(32 * 20, 32 * 16)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(32 * 16, 32 * 8)),
            ('relu3', nn.ReLU()),
            ('fc4', nn.Linear(32 * 8, 32 * 4)),
            ('relu4', nn.ReLU()),
            ('fc5', nn.Linear(32 * 4, 32 * 2)),
            ('relu5', nn.ReLU()),
            ('fc6', nn.Linear(32 * 2, 16 * 2)),
            ('relu6', nn.ReLU()),
            ('fc7', nn.Linear(16 * 2, 10 * 1)),
        ]))

    def forward(self, img):
        output = img.view(-1, 32 * 32)
        output = self.fc(output)
        return output
    

data_train = MNIST('data/mnist',
                   download=True,
                   transform=transforms.Compose([
                       transforms.Resize((32, 32)),
                       transforms.ToTensor()]))
data_test = MNIST('data/mnist',
                  train=False,
                  download=True,
                  transform=transforms.Compose([
                      transforms.Resize((32, 32)),
                      transforms.ToTensor()]))
data_train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=8)
data_test_loader = DataLoader(data_test, batch_size=1024, num_workers=8)

net = MultilayerClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=2e-3)

def train(epoch):
    net.train()
    loss_list, batch_list = [], []
    for i,(images, labels) in enumerate(data_train_loader):
        images, labels = Variable(images), Variable(labels)

        optimizer.zero_grad()

        output = net(images)

        loss = criterion(output, labels)

        loss_list.append(loss.data[0])
        batch_list.append(i+1)

        if i % 10 == 0:
            print('Train - Epoch %d, Batch: %d, Loss: %f' % (epoch, i, loss.data[0]))

        loss.backward()
        optimizer.step()


def test():
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    for i, (images, labels) in enumerate(data_test_loader):
        images, labels = Variable(images), Variable(labels)
        output = net(images)
        avg_loss += criterion(output, labels).sum()
        pred = output.data.max(1)[1]
        total_correct += pred.eq(labels.data.view_as(pred)).sum()

    avg_loss /= len(data_test)
    print('Test Avg. Loss: %f, Accuracy: %f' % (avg_loss.data[0], float(total_correct) / len(data_test)))


def train_and_test(epoch):
    train(epoch)
    test()


def main():
    for e in range(1, 5):  # Change 5 to 16 for better performance
        train_and_test(e)


if __name__ == '__main__':
    main()

Train - Epoch 1, Batch: 0, Loss: 2.310225
Train - Epoch 1, Batch: 10, Loss: 2.109520
Train - Epoch 1, Batch: 20, Loss: 1.335957
Train - Epoch 1, Batch: 30, Loss: 0.790486
Train - Epoch 1, Batch: 40, Loss: 0.675016
Train - Epoch 1, Batch: 50, Loss: 0.647539
Train - Epoch 1, Batch: 60, Loss: 0.389924
Train - Epoch 1, Batch: 70, Loss: 0.332017
Train - Epoch 1, Batch: 80, Loss: 0.353927
Train - Epoch 1, Batch: 90, Loss: 0.330918
Train - Epoch 1, Batch: 100, Loss: 0.293545
Train - Epoch 1, Batch: 110, Loss: 0.409436
Train - Epoch 1, Batch: 120, Loss: 0.234859
Train - Epoch 1, Batch: 130, Loss: 0.240823
Train - Epoch 1, Batch: 140, Loss: 0.197858
Train - Epoch 1, Batch: 150, Loss: 0.128776
Train - Epoch 1, Batch: 160, Loss: 0.152290
Train - Epoch 1, Batch: 170, Loss: 0.211546
Train - Epoch 1, Batch: 180, Loss: 0.296821
Train - Epoch 1, Batch: 190, Loss: 0.186809
Train - Epoch 1, Batch: 200, Loss: 0.286462
Train - Epoch 1, Batch: 210, Loss: 0.198773
Train - Epoch 1, Batch: 220, Loss: 0.116536

The accuracy we get with seven layers is also really high at 97%.  Can we do even better?