# Convolutional networks - VGG
    
A convolutional neural network with an architecture inspired by a VGG-network [(Simonyan \& Zisserman, 2015)](https://arxiv.org/abs/1409.1556).
    

In [1]:
skip_training = False  # Set this flag to True before validation and submission

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import tools
import tests

In [3]:
# When running on your own computer, you can specify the data directory by:
# data_dir = tools.select_data_dir('/your/local/data/directory')
data_dir = tools.select_data_dir()

The data directory is ../data


In [4]:
# Select the device for training (use GPU if you have one)
#device = torch.device('cuda:0')
device = torch.device('cpu')

In [5]:
if skip_training:
    # The models are always evaluated on CPU
    device = torch.device("cpu")

## FashionMNIST dataset

The dataset is used is FashionMNIST. It consists of 60,000 training images of 10 classes: 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'.

In [6]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Normalize((0.5,), (0.5,))  # Min-max scaling to [-1, 1]
])

trainset = torchvision.datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
testset = torchvision.datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
           'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

## VGG-style network

Here I created a convolution neural network with an architecture inspired by the [VGG-net](https://arxiv.org/abs/1409.1556):

<img src="images/vgg-style.png" width="300" >

The architecture of the convolutional network:

* A block of three convolutional layers with:
    * 3x3 kernel
    * 16 output channels
    * one pixel zero-pading on both sides
    * 2d batch normalization after each convolutional layer
    * ReLU nonlinearity after each 2d batch normalization layer
* Max pooling layer with 2x2 kernel and stride 2.
* A block of three convolutional layers with:
    * 3x3 kernel
    * 32 output channels
    * one pixel zero-pading on both sides
    * 2d batch normalization after each convolutional layer
    * ReLU nonlinearity after each 2d batch normalization layer
* Max pooling layer with 2x2 kernel and stride 2.
* One convolutional layer with:
    * 3x3 kernel
    * 48 output channels
    * *no padding*
    * 2d batch normalization after the convolutional layer
    * ReLU nonlinearity after the 2d batch normalization layer
* One convolutional layer with:
    * 1x1 kernel
    * 32 output channels
    * *no padding*
    * 2d batch normalization after the convolutional layer
    * ReLU nonlinearity after the 2d batch normalization layer
* One convolutional layer with:
    * 1x1 kernel
    * 16 output channels
    * *no padding*
    * 2d batch normalization after the convolutional layer
    * ReLU nonlinearity after the 2d batch normalization layer
* Global average pooling (compute the average value of each channel across all the input locations):
    * 5x5 kernel (the input of the layer should be 5x5)
* A fully-connected layer with 10 outputs (no nonlinearity)

Notes:
* Batch normalization is expected to be right after a convolutional layer, before nonlinearity.
* We recommend that you check the number of modules with trainable parameters in your network.

In [7]:
class VGGNet(nn.Module):
    def __init__(self, n_channels=16):
        """
        Args:
          n_channels (int): Number of channels in the first convolutional layer. The number of channels in the following layers are the multipliers of n_channels. Hence, parameters of the layers to follow can be defined using this variable.
        """
        super(VGGNet, self).__init__()
        self.layer1 = nn.Sequential()
        self.layer1.add_module("Conv1", nn.Conv2d(in_channels=1, out_channels=n_channels, kernel_size=3, padding=1))
        self.layer1.add_module("BN1", nn.BatchNorm2d(num_features=n_channels))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer1.add_module("Relu1", nn.ReLU(inplace=False))

        self.layer2 = nn.Sequential()
        self.layer2.add_module("Conv2", nn.Conv2d(in_channels=n_channels, out_channels=n_channels, kernel_size=3, padding=1))
        self.layer2.add_module("BN2", nn.BatchNorm2d(num_features=n_channels))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer2.add_module("Relu2", nn.ReLU(inplace=False))

        self.layer3 = nn.Sequential()
        self.layer3.add_module("Conv3", nn.Conv2d(in_channels=n_channels, out_channels=n_channels, kernel_size=3, padding=1))
        self.layer3.add_module("BN3", nn.BatchNorm2d(num_features=n_channels))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer3.add_module("Relu3", nn.ReLU(inplace=False))
                               
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2,padding = 0)
                               
        self.layer4 = nn.Sequential()
        self.layer4.add_module("Conv4", nn.Conv2d(in_channels=n_channels, out_channels=32, kernel_size=3, padding=1))
        self.layer4.add_module("BN4", nn.BatchNorm2d(num_features=32))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer4.add_module("Relu4", nn.ReLU(inplace=False))

        self.layer5 = nn.Sequential()
        self.layer5.add_module("Conv5", nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1))
        self.layer5.add_module("BN5", nn.BatchNorm2d(num_features=32))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer5.add_module("Relu5", nn.ReLU(inplace=False))
                               
        self.layer6 = nn.Sequential()
        self.layer6.add_module("Conv6", nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1))
        self.layer6.add_module("BN6", nn.BatchNorm2d(num_features=32))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer6.add_module("Relu6", nn.ReLU(inplace=False))                               
                               
        self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2,padding = 0)
                               
        self.layer7 = nn.Sequential()
        self.layer7.add_module("Conv7", nn.Conv2d(in_channels=32, out_channels=48, kernel_size=3, padding=0))
        self.layer7.add_module("BN7", nn.BatchNorm2d(num_features=48))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer7.add_module("Relu7", nn.ReLU(inplace=False)) 
                               
        self.layer8 = nn.Sequential()
        self.layer8.add_module("Conv8", nn.Conv2d(in_channels=48, out_channels=32, kernel_size=1, padding=0))
        self.layer8.add_module("BN8", nn.BatchNorm2d(num_features=32))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer8.add_module("Relu8", nn.ReLU(inplace=False))       
                               
        self.layer9 = nn.Sequential()
        self.layer9.add_module("Conv9", nn.Conv2d(in_channels=32, out_channels=16, kernel_size=1, padding=0))
        self.layer9.add_module("BN9", nn.BatchNorm2d(num_features=16))# eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
        self.layer9.add_module("Relu9", nn.ReLU(inplace=False)) 
                               
        self.pool3 = nn.AvgPool2d(kernel_size=5)
                               
        self.fc1 = nn.Linear(16*1*1, 10) 

                               
    def forward(self, x, verbose=False):
        """
        Args:
          x of shape (batch_size, 1, 28, 28): Input images.
          verbose: True if you want to print the shapes of the intermediate variables.
        
        Returns:
          y of shape (batch_size, 10): Outputs of the network.
        """
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.pool1(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.pool2(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer9(x)
        x = self.pool3(x)
        y = x.view(-1,16*1*1) 
        y = self.fc1(y)
        return y

## Training 

In [8]:
# This function computes the accuracy on the test dataset
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [9]:
net = VGGNet()

In [10]:
# Implement the training loop
if not skip_training:
    optimizer = torch.optim.Adam(net.parameters(),lr=0.01)
    n_epochs = 10
    loss_method = nn.CrossEntropyLoss()

    for epoch in range(n_epochs):
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            net.zero_grad()
            outputs = net(inputs)
            loss = loss_method(outputs,labels)
            loss.backward()
            optimizer.step() 

        test_accuracy = compute_accuracy(net, testloader)
        print('Train Epoch {}: Loss: {:.6f} Test accuracy {:.2f}'.format(epoch, loss.item(), test_accuracy)) 

Train Epoch 0: Loss: 0.526230 Test accuracy 0.87
Train Epoch 1: Loss: 0.188631 Test accuracy 0.88
Train Epoch 2: Loss: 0.403229 Test accuracy 0.88
Train Epoch 3: Loss: 0.197453 Test accuracy 0.89
Train Epoch 4: Loss: 0.471652 Test accuracy 0.90
Train Epoch 5: Loss: 0.231441 Test accuracy 0.89
Train Epoch 6: Loss: 0.274910 Test accuracy 0.91
Train Epoch 7: Loss: 0.074332 Test accuracy 0.90
Train Epoch 8: Loss: 0.552233 Test accuracy 0.91
Train Epoch 9: Loss: 0.106245 Test accuracy 0.91


## Save model to disk

In [11]:
if not skip_training:
    tools.save_model(net, '4_vgg_net.pth')
else:
    net = VGGNet()
    tools.load_model(net, '4_vgg_net.pth', device)

Do you want to save the model (type yes to confirm)? yes
Model saved to 4_vgg_net.pth.


## Evaluate model performance on test set

In [12]:
# Compute the accuracy on the test set
accuracy = compute_accuracy(net, testloader)
print('Accuracy of the VGG net on the test images: %.3f' % accuracy)

Accuracy of the VGG net on the test images: 0.911
