# DLVC 2017
# Tutorial 10: MNIST Digits Classification (LeNet)

### MNIST database (http://yann.lecun.com/exdb/mnist/)

In [None]:
%matplotlib inline
import os
import torch
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torchvision
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import copy
import time

In [None]:
# Check availability of GPU
use_gpu = torch.cuda.is_available()
if use_gpu:
    pinMem = True # Flag for pinning GPU memory
    print('GPU is available!')
else:
    pinMem = False

### Downloading datset

In [None]:
apply_transform = transforms.Compose([transforms.Resize(32),transforms.ToTensor()])
trainLoader = torch.utils.data.DataLoader(datasets.MNIST('./MNIST/', train=True, download=True,
                                                         transform = apply_transform), batch_size=1024, shuffle=True, num_workers=1, pin_memory=pinMem)
testLoader = torch.utils.data.DataLoader(datasets.MNIST('./MNIST/', train=False,transform=apply_transform),
                                         batch_size=1024, shuffle=True, num_workers=1, pin_memory=pinMem)

In [None]:
# Size of train and test datasets
print('No. of samples in train set: '+str(len(trainLoader.dataset)))
print('No. of samples in test set: '+str(len(testLoader.dataset)))

### Define network architecture

In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2_drop(self.conv2(x)))
        x = self.pool2(x)
        x = x.view(-1, 400)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, training=self.training)
        x = self.fc3(x)
        return F.log_softmax(x,dim=1)

### Initialize the network

In [None]:
net = LeNet()
print(net)

if use_gpu:
    net = net.cuda()

### Total number of trainable parameters

In [None]:
totalParams = 0
for params in net.parameters():
    print(params.size())
    totalParams += np.sum(np.prod(params.size()))
print('Total number of parameters: '+str(totalParams))

In [None]:
init_conv1 = copy.deepcopy(net.conv1.weight.data)

### Define loss function and optimizer

In [None]:
criterion = nn.NLLLoss() # Negative Log-likelihood
optimizer = optim.SGD(net.parameters(), lr=1e-2, momentum=0.9) # Stochastic gradient descent with momentum

### Train the network

In [None]:
iterations = 10
trainLoss = []
testAcc = []
start = time.time()
for epoch in range(iterations):
    epochStart = time.time()
    runningLoss = 0    
    net.train(True) # For training
    for data in trainLoader:
        inputs,labels = data
        # Wrap them in Variable
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), \
                Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)         
        # Initialize gradients to zero
        optimizer.zero_grad()
        # Feed-forward input data through the network        
        outputs = net(inputs)
        # Compute loss/error
        loss = criterion(outputs, labels)
        # Backpropagate loss and compute gradients
        loss.backward()
        # Update the network parameters
        optimizer.step()
        # Accumulate loss per batch
        runningLoss += loss.item()
    avgTrainLoss = runningLoss/60000.0
    trainLoss.append(avgTrainLoss)
    
    # Evaluating performance on test set for each epoch
    net.train(False) # For testing [Affects batch-norm and dropout layers (if any)]
    running_correct = 0
    for data in testLoader:
        inputs,labels = data
        # Wrap them in Variable
        if use_gpu:
            inputs = Variable(inputs.cuda())
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            predicted = predicted.cpu()
        else:
            inputs = Variable(inputs)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
        running_correct += (predicted == labels).sum()
    avgTestAcc = running_correct.numpy()/10000.0
    testAcc.append(avgTestAcc)
        
    # Plotting training loss vs Epochs
    fig1 = plt.figure(1)        
    plt.plot(range(epoch+1),trainLoss,'r-',label='train')        
    if epoch==0:
        plt.legend(loc='upper left')
        plt.xlabel('Epochs')
        plt.ylabel('Training loss')   
    # Plotting testing accuracy vs Epochs
    fig2 = plt.figure(2)        
    plt.plot(range(epoch+1),testAcc,'g-',label='test')        
    if epoch==0:
        plt.legend(loc='upper left')
        plt.xlabel('Epochs')
        plt.ylabel('Testing accuracy')    
    epochEnd = time.time()-epochStart
    print('Iteration: {:.0f} /{:.0f}  ;  Training Loss: {:.6f} ; Testing Acc: {:.3f} ; Time consumed: {:.0f}m {:.0f}s '\
          .format(epoch + 1,iterations,avgTrainLoss,avgTestAcc*100,epochEnd//60,epochEnd%60))
end = time.time()-start
print('Training completed in {:.0f}m {:.0f}s'.format(end//60,end%60))

### Visualizing the kernels

In [None]:
# functions to show an image
def imshow(img, strlabel):
    npimg = img.numpy()
    npimg = np.abs(npimg)
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 10
    fig_size[1] = 10
    plt.rcParams["figure.figsize"] = fig_size
    plt.figure()
    plt.title(strlabel)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
trained_conv1 = net.conv1.weight.data
if use_gpu:
    imshow(torchvision.utils.make_grid(init_conv1.cpu(),nrow=5,normalize=True),'Initial Weights')
    imshow(torchvision.utils.make_grid(trained_conv1.cpu(),nrow=5,normalize=True),'Trained Weights')
else:
    imshow(torchvision.utils.make_grid(init_conv1,nrow=5,normalize=True),'Initial Weights')
    imshow(torchvision.utils.make_grid(trained_conv1,nrow=5,normalize=True),'Trained Weight')

### Saving the trained model

In [None]:
torch.save(net.state_dict(), 'trainedNet.pt') # Saving the trained parameters

### Loading saved model

In [None]:
new_net = LeNet()
new_net.load_state_dict(torch.load('trainedNet.pt'))