# Image Classification using a Multi-Layer Neural Network

Next, we measure the performance of a multi-layer neural network on the MNIST dataset to set a baseline.

The layers of the same are wrapped in an **nn.Sequential** object.

In [1]:
# Code adapted from https://github.com/activatedgeek/LeNet-5/

from collections import OrderedDict
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision.datasets.mnist import MNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

class MultilayerClassifier(nn.Module):
    """
    Input - 1x32x32
    Output - 10 (Output)
    """
    def __init__(self):
        super(MultilayerClassifier, self).__init__()

        self.fc = nn.Sequential(OrderedDict([
            ('linear_combination', nn.Linear(32 * 32, 32 * 4)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(32 * 4, 10)),
        ]))

    def forward(self, img):
        output = img.view(-1, 32 * 32)
        output = self.fc(output)
        return output
    

data_train = MNIST('data/mnist',
                   download=True,
                   transform=transforms.Compose([
                       transforms.Resize((32, 32)),
                       transforms.ToTensor()]))
data_test = MNIST('data/mnist',
                  train=False,
                  download=True,
                  transform=transforms.Compose([
                      transforms.Resize((32, 32)),
                      transforms.ToTensor()]))
data_train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=8)
data_test_loader = DataLoader(data_test, batch_size=1024, num_workers=8)

net = MultilayerClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=2e-3)

def train(epoch):
    net.train()
    loss_list, batch_list = [], []
    for i,(images, labels) in enumerate(data_train_loader):
        images, labels = Variable(images), Variable(labels)

        optimizer.zero_grad()

        output = net(images)

        loss = criterion(output, labels)

        loss_list.append(loss.data.item())
        batch_list.append(i+1)

        if i % 10 == 0:
            print('Train - Epoch %d, Batch: %d, Loss: %f' % (epoch, i, loss.data.item()))

        loss.backward()
        optimizer.step()


def test():
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    for i, (images, labels) in enumerate(data_test_loader):
        images, labels = Variable(images), Variable(labels)
        output = net(images)
        avg_loss += criterion(output, labels).sum()
        pred = output.data.max(1)[1]
        total_correct += pred.eq(labels.data.view_as(pred)).sum()

    avg_loss /= len(data_test)
    print('Test Avg. Loss: %f, Accuracy: %f' % (avg_loss.data.item(), float(total_correct) / len(data_test)))


def train_and_test(epoch):
    train(epoch)
    test()


def main():
    for e in range(1, 5):  # Change 5 to 16 for better performance
        train_and_test(e)


if __name__ == '__main__':
    main()

Train - Epoch 1, Batch: 0, Loss: 2.297173
Train - Epoch 1, Batch: 10, Loss: 1.088993
Train - Epoch 1, Batch: 20, Loss: 0.574045
Train - Epoch 1, Batch: 30, Loss: 0.421659
Train - Epoch 1, Batch: 40, Loss: 0.409437
Train - Epoch 1, Batch: 50, Loss: 0.323081
Train - Epoch 1, Batch: 60, Loss: 0.363600
Train - Epoch 1, Batch: 70, Loss: 0.311784
Train - Epoch 1, Batch: 80, Loss: 0.319516
Train - Epoch 1, Batch: 90, Loss: 0.283992
Train - Epoch 1, Batch: 100, Loss: 0.279556
Train - Epoch 1, Batch: 110, Loss: 0.315841
Train - Epoch 1, Batch: 120, Loss: 0.324343
Train - Epoch 1, Batch: 130, Loss: 0.290451
Train - Epoch 1, Batch: 140, Loss: 0.356294
Train - Epoch 1, Batch: 150, Loss: 0.243043
Train - Epoch 1, Batch: 160, Loss: 0.274326
Train - Epoch 1, Batch: 170, Loss: 0.281660
Train - Epoch 1, Batch: 180, Loss: 0.270453
Train - Epoch 1, Batch: 190, Loss: 0.353078
Train - Epoch 1, Batch: 200, Loss: 0.255094
Train - Epoch 1, Batch: 210, Loss: 0.197983
Train - Epoch 1, Batch: 220, Loss: 0.177464

The accuracy we get with just two layers is around 97%, which is a huge improvement over single-layer models on the MNIST dataset.