# Lab 03 : LeNet5 architecture - exercise

In [1]:
import sys, os

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import utils
import time

### With or without GPU?
It is recommended to run this code on GPU:
- Time for 1 epoch on CPU: 96 sec (1.62 min)
- Time for 1 epoch on GPU: 2 sec w/ GeForce GTX 1080 Ti

In [6]:
device = torch.device("cpu")
print(device)

cpu


### Download the MNIST dataset

In [7]:
from utils import check_mnist_dataset_exists
data_path=check_mnist_dataset_exists()

train_data=torch.load(data_path+'mnist/train_data.pt')
train_label=torch.load(data_path+'mnist/train_label.pt')
test_data=torch.load(data_path+'mnist/test_data.pt')
test_label=torch.load(data_path+'mnist/test_label.pt')

print(train_data.size())
print(test_data.size())

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


### Compute average pixel intensity over all training set and all channels

In [8]:
mean = train_data.mean()
print(mean)

tensor(0.1306)


### Compute standard deviation

In [10]:
std = train_data.std()
print(std)

tensor(0.3081)


### Make a LeNet5 convnet class

In [12]:
class LeNet5_convnet(nn.Module):
    
    def __init__(self):
        
        super(LeNet5_convnet, self).__init__()
    
        # CL1: 1 x 28 x 28 --> 50 x 28 x 28
        self.conv1 = nn.Conv2d(1, 50, kernel_size = 3, padding = 1)

        # MP1: 50 x 28 x 28 --> 50 x 14 x 14
        self.pool1 = nn.MaxPool2d(2, 2)

        # CL2: 50 x 14 x 14 --> 100 x 14 x 14
        self.conv2 = nn.Conv2d(50, 100, kernel_size = 3, padding = 1)

        # MP2: 100 x 14 x 14 --> 100 x 7 x 7
        self.pool2 = nn.MaxPool2d(2, 2)

        # LL1: 100 x 7 x 7 = 4900 --> 100
        self.linear1 = nn.Linear(4900, 100)

        # LL2: 100 --> 10
        self.linear2 = nn.Linear(100, 10, bias = True)
    
    def forward(self, x):
        
        x = self.conv1(x)
        x = F.relu(x)
        
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        
        x = self.pool2(x)
        
        x = x.view(-1, 4900)
        x = self.linear1(x)
        x = F.relu(x)
        
        x = self.linear2(x)
        
        return x
    

### Build the net

In [14]:
net = LeNet5_convnet()
print(net)
utils.display_num_param(net)

LeNet5_convnet(
  (conv1): Conv2d(1, 50, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(50, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=4900, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=10, bias=True)
)
There are 536710 (0.54 million) parameters in this neural network


### Send the weights of the networks to the GPU (as well as the mean and std)

In [16]:
net = net.to(device)

mean = mean.to(device)

std = std.to(device)

### Choose the criterion, batch size, and initial learning rate. Select the following:
- batch size = 128
- initial learning rate = 0.25

In [18]:
criterion = nn.CrossEntropyLoss()

my_lr = 0.25

bs = 128

### Function to evalate the network on the test set

In [19]:
def eval_on_test_set():
    
    running_error = 0
    num_batches = 0
    
    for i in range(0, 10000, bs):
        
        minibatch_data = test_data[i:i+bs].unsqueeze(dim=1)
        minibatch_label = test_label[i:i+bs]
        
        minibatch_data = minibatch_data.to(device)
        minibatch_label = minibatch_label.to(device)
        
        inputs = (minibatch_data - mean) / std
        
        scores = net(inputs)
        
        error = utils.get_error(scores, minibatch_label)
        
        running_error += error.item()
        
        num_batches += 1
        
    total_error = running_error / num_batches
    print('error rate on test set = ', total_error * 100, 'percent')

### Do 30 passes through the training set. Divide the learning rate by 2 every 5 epochs

In [None]:
start=time.time()

for epoch in range(1,30):
    
    if not epoch%5:
        my_lr = my_lr / 2
        
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
        
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(60000)
 
    for count in range(0,60000,bs):
        
        # FORWARD AND BACKWARD PASS
    
        optimizer.zero_grad()
             
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices].unsqueeze(dim=1)
        minibatch_label=  train_label[indices]
        
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = (minibatch_data - mean)/std      
        
        inputs.requires_grad_()

        scores=net( inputs ) 

        loss =  criterion( scores , minibatch_label) 
          
        loss.backward()
        
        optimizer.step()
        

        # COMPUTE STATS
        
        running_loss += loss.detach().item()
        
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # AVERAGE STATS THEN DISPLAY
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = (time.time()-start)/60
    
    print('epoch=',epoch, '\t time=', elapsed,'min', '\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set() 
    print(' ')

epoch= 1 	 time= 2.01674386660258 min 	 lr= 0.25 	 loss= 0.2685899446422516 	 error= 8.598192071101304 percent
error rate on test set =  2.2844145569620253 percent
 
epoch= 2 	 time= 4.037270088990529 min 	 lr= 0.25 	 loss= 0.05859391000181405 	 error= 1.7712775451033864 percent
error rate on test set =  1.3844936708860758 percent
 
epoch= 3 	 time= 6.124068915843964 min 	 lr= 0.25 	 loss= 0.034963075733080363 	 error= 1.0927505330490406 percent
error rate on test set =  1.1075949367088607 percent
 
epoch= 4 	 time= 8.068228793144225 min 	 lr= 0.25 	 loss= 0.02596571209489493 	 error= 0.8317786112014672 percent
error rate on test set =  1.0284810126582278 percent
 
epoch= 5 	 time= 10.053762833277384 min 	 lr= 0.125 	 loss= 0.014445360574095862 	 error= 0.42810501066098083 percent
error rate on test set =  0.7911392405063291 percent
 
epoch= 6 	 time= 12.027889955043793 min 	 lr= 0.125 	 loss= 0.01103264417736106 	 error= 0.3059479346407502 percent
error rate on test set =  1.097705696