# ABOUT THE MODEL

In [None]:
'''
NAME          : model_1 
DATASET       : FMNIST
MODEL         : Two layered CNN with Max-pooling and Valid-padding
ACCURACY      : 90.37 %
DATASET CLASS : standard
'''
# Model Name
model_name = 'model_1'

__NOTE__ : The model has been analysed after 90,000 iterations. The results of these iterations, as well as the parameter values can be restored by running the second last cell of this notebook.

To restore the results, run all the cells before the section "Training the model", and then run the second last cell. Thereafter you can run all the cells after "Evaluating model performance".

# PRELIMINARY 

In [None]:
'''IMPORTING MODULES'''

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

import os
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


plt.ion()

# LOADING THE DATASET

In [None]:
'''LOADING DATASET'''
# Dataset located in folder : "./data"

train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

In [None]:
'''MAKING DATASET ITERABLE'''

batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# CREATING THE MODEL

We create a model class to design the model. The model has the following layers:-
- __CONVOLUTION 1__
    - In channels : 01
    - Out channels : 16
- __MAX POOL 1__
    - Kernel size : 2
- __CONVOLUTION 2__
    - In channels : 16
    - Out channels : 32
- __MAX POOL 2__
    - Kernel size : 2

- __FULLY CONNECTED__ 

In [None]:
'''CREATEING MODEL CLASS'''

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1 [01,28,28 -> 16,24,24]
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU() 
        
        # Max pool 1 [16,24,24 -> 16,12,12]
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
     
        # Convolution 2 [16,12,12 -> 32,08,08]
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()
        
        # Max pool 2 [32,08,08 -> 32,04,04]
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected (readout) [32*4*4 -> 10]
        self.fc1 = nn.Linear(32 * 4 * 4, 10) 
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Max pool 1
        out = self.maxpool1(out)
        
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        
        # Max pool 2 
        out = self.maxpool2(out)
        
        # Resize [100,32,04,04 -> 100,32*4*4]
        out = out.view(out.size(0), -1)    
        
        # Linear function (readout)
        out = self.fc1(out)
        
        return out

In [None]:
'''INSTANTIATING MODEL CLASS'''

model = CNNModel()

if torch.cuda.is_available():
    model.cuda()

In [None]:
'''INSTANTIATING LOSS CLASS'''

criterion = nn.CrossEntropyLoss()

In [None]:
'''INSTANTIATING OPTIMIZER CLASS'''

learning_rate = 0.03
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# TRAINING THE MODEL

In [None]:
'''INITIALIZING SOME VARIABLES'''

iterr = 0

# Initializing inspection lists
iter_list = []                           # Saves Iterations at which the model has been evaluated
train_loss_list = []                     # Saves Train Loss
train_acc_list = []                      # Saves Train Accuracy
test_loss_list = []                      # Saves Test Loss
test_acc_list = []                       # Saves Test Accuracy

In [None]:
'''SETTING CUSTOM PARAMETERS'''

n_iters = 18000                          # Number of iteration to train the model
inspect_size = 250                       # Size at which model is evaluated for later inspection
num_epochs = int(n_iters / (len(train_dataset) / batch_size))

print("Number of Iterations     :", n_iters)
print("Number of Epochs         :", num_epochs)
print("Number of Sample-Points  :", int(n_iters/inspect_size))

In [None]:
'''TRAINING THE MODEL'''

import time
time_begin = time.asctime()   # Time when training started

init_iters = iterr
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images)
            labels = Variable(labels)
        
        optimizer.zero_grad()                 # Clearing the previous gradients

        outputs = model(images)               # Forward propogation
        loss = criterion(outputs, labels)     # Calculating the Train loss
        loss.backward()                       # Backward propogation
        optimizer.step()                      # Optimizing the parameters
        
        iterr += 1
        
        ### Inspecting the performance of the model ###
        if iterr % inspect_size == 0:
            iter_list.append(iterr)
            print("Iteration : {:.0f}/{:.0f} [{:2.0f}%] ".format(iterr - init_iters, n_iters, 100*(iterr - init_iters)/n_iters))
            print('---------------------------')
        # --------------------------------------------------------------------------------------------------------
        
        ### Calculating train accuracy and loss ###
            temp, predicted = torch.max(outputs.data, 1)
            total = labels.size(0) 
            
            if torch.cuda.is_available():
                correct = (predicted.cpu() == labels.cpu()).sum()
            else:
                correct = (predicted == labels).sum()
                                          
            accuracy = 100 * correct.item() / total            

            train_loss_list.append(loss.item())
            train_acc_list.append(accuracy)

            print('[Train]\t Loss: {:.2f}\t Accuracy: {:.2f}'.format(loss.item(), accuracy))    
        # --------------------------------------------------------------------------------------------------------
        
        ### Calculating test accuracy and loss ###
            correct = 0
            total = 0          
                        
            for images, labels in test_loader:

                if torch.cuda.is_available():
                    images = Variable(images.cuda())
                    labels = Variable(labels.cuda())
                else:
                    images = Variable(images)
                    labels = Variable(labels)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                temp, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
            
            accuracy = 100 * correct.item() / total
            
            test_loss_list.append(loss.item())
            test_acc_list.append(accuracy)
            
            print('[Test ]\t Loss: {:.2f}\t Accuracy: {:.2f}'.format(loss.item(), accuracy))
        # --------------------------------------------------------------------------------------------------------
            print('=========================================================')
time_end = time.asctime()    # Time when training ended 

In [None]:
'''POST TRAINING RESULTS'''

# Formatting the date-time data
from datetime import datetime
FMT = '%H:%M:%S'
td = (datetime.strptime(time_end[11:19], FMT) - datetime.strptime(time_begin[11:19], FMT)).seconds
hr = (td//3600)
min = (td - 3600*hr)//60
sec = (td - 3600*hr - 60*min)

print("Total Iterations     : {:.0f}".format(iterr))
print("Total Epochs         : {:.0f}".format(iterr*100/60000))
print("Total Sample-Points  : {:.0f}".format(iterr/inspect_size))
print("-------------------------------")
print("Accuracy - Train     : {:.2f}".format(np.mean(train_acc_list[-10:])))
print("Accuracy - Test      : {:.2f}".format(np.mean(test_acc_list[-10:])))
print("-------------------------------")
print("Start Time        : {}".format(time_begin[11:19]))
print("End Time          : {}".format(time_end[11:19]))
print("Total Train-time  : {:2.0f}:{:2.0f}:{:2.0f}".format(hr,min,sec))

# EVALUATING MODEL PERFORMANCE

To calculate the performance of out model, we inspect the data in the inspection lists we had defined earlier. However the performance of the model depends greatly on the the specific data subset chosen for evaluation. 

In [None]:
'''PLOTTING ABSOLUTE VALUES'''

plt.figure(figsize=[16,6], dpi=100)

plt.subplot(1, 2, 1)
plt.plot(iter_list, train_acc_list, '-', c='salmon', label='Train Accuracy')
plt.plot(iter_list, test_acc_list, '-', c='brown', label='Test Accuracy')
plt.legend(fontsize='10')
plt.title("ABSOLUTE ACCURACY", size='20')
plt.xlabel('Number of Iterations', size='15')
plt.ylabel('Accuracy', size='15')
plt.grid(True, linestyle='-.')

plt.subplot(1, 2, 2)
plt.plot(iter_list, train_loss_list, '-', c='salmon', label='Train Loss')
plt.plot(iter_list, test_loss_list, '-', c='brown', label='Test Loss')
plt.legend(fontsize='10')
plt.title("ABSOLUTE LOSS", size='20')
plt.xlabel('Number of Iterations', size='15')
plt.ylabel('Loss', size='15')
plt.grid(True, linestyle='-.')


For train evaluation we had chosen a random subset of 100 data-points. This brings high variability in the performance of the model. Hence we generate rolling average lists that calculate the accuracy and loss as the average of the last few (= roll_size) data points and plot these as a function of the number of iterations elapsed. This gives us a much smoother and followable trend line.

Although we have calculated test accuracy and loss over a larger data subset (10,000 data-points), we still see some variability in the data. Hence we generate rolling data lists for test data. We use fewer data-points (=10) for the test rolling list as it has less variability, and hence gives a smooth curve for fewer data-points.

In [None]:
'''GENERATING AVERAGE INSPECTION LISTS'''

# Defining roll function
def make_roll(input_list, roll_size=5):
    output_list = []
    
    for i in range(len(input_list)):
        if i==0:
            output_list.append(input_list[0])
        elif i<roll_size:
            output_list.append(np.mean(input_list[:i+1]))
        else:
            output_list.append(np.mean(input_list[i-roll_size:i]))
    return output_list

# Generating roll lists
train_roll_loss_list = make_roll(train_loss_list, roll_size=30) 
train_roll_acc_list = make_roll(train_acc_list, roll_size=30)  
test_roll_loss_list = make_roll(test_loss_list, roll_size=10)   
test_roll_acc_list = make_roll(test_acc_list, roll_size=10) 

In [None]:
'''PLOTTING AVERAGE VALUES'''

plt.figure(figsize=[16,6], dpi=100)

plt.subplot(1, 2, 1)
plt.plot(iter_list, train_acc_list, '-', c='salmon', label='Train Accuracy')
plt.plot(iter_list, train_roll_acc_list, '-r', lw=3, label='Train Accuracy [Avg]')
plt.plot(iter_list, test_acc_list, '-', c='brown', label='Test Accuracy')
plt.plot(iter_list, test_roll_acc_list, '-k', lw=3, label='Test Accuracy [Avg]')
plt.legend(fontsize='10')
plt.title("AVERAGE ACCURACY", size='20')
plt.xlabel('Number of Iterations', size='15')
plt.ylabel('Accuracy', size='15')
plt.grid(True, linestyle='-.')

plt.subplot(1, 2, 2)
plt.plot(iter_list, train_loss_list, '-', c='salmon', label='Train Loss')
plt.plot(iter_list, train_roll_loss_list, '-r', lw=3, label='Train Loss [Avg]')
plt.plot(iter_list, test_loss_list, '-', c='brown', label='Test Loss')
plt.plot(iter_list, test_roll_loss_list, '-k', lw=3, label='Test Loss [Avg]')
plt.legend(fontsize='10')
plt.title("AVERAGE LOSS", size='20')
plt.xlabel('Number of Iterations', size='15')
plt.ylabel('Loss', size='15')
plt.grid(True, linestyle='-.')

The average accuracy and loss graphs clearly show that our model is successful in fitting the data well. Theses graphs potray a solid trend right through the osccillating values of the previous graphs.

## ACCURACY ANALYSIS

In [None]:
'''PLOTTTING THE ACCURACY GRAPH'''

plt.figure(figsize=[39,24], dpi=50)

plt.plot(iter_list, train_acc_list, '-', lw=3, c='salmon', label='Train Accuracy')
plt.plot(iter_list, train_roll_acc_list, '-|r', lw=7, label='Train Accuracy [Roll]')

plt.plot(iter_list, test_acc_list, '-', lw=3, c='brown', label='Test Accuracy')
plt.plot(iter_list, test_roll_acc_list, '-|k', lw=7, label='Test Accuracy [Roll]')

plt.title('ACCURACY vs ITERATIONS', size='60')
plt.xlabel('Number of Iterations', size='40')
plt.ylabel('Accuracy', size='40')

plt.grid(True, linestyle='-.',)
plt.tick_params(labelcolor='k', labelsize='30', width=3)

plt.legend(fontsize='30')
plt.show()

#plt.savefig(model_name + '-1-accuracy_vs_iterations.png', dpi=120)

As we can infer from the above graph, the test accuracy reaches it maximum value at around 40,000 iterations. There is a slight decrease in it thereafter. The train accuracy however increases indefinitely. This is a clear indication that the model is overfitting the data, as the train accuracy is not reproducible in the test set.

## LOSS ANALYSIS

In [None]:
'''PLOTTTING THE LOSS GRAPH'''

plt.figure(figsize=[39,24], dpi=50)

plt.plot(iter_list, train_loss_list, '-', lw=3, c='salmon', label='Train Loss')
plt.plot(iter_list, train_roll_loss_list, '-|r', lw=6, label='Train Loss [Roll]')

plt.plot(iter_list, test_loss_list, '-', lw=3, c='brown', label='Test Loss')
plt.plot(iter_list, test_roll_loss_list, '-|k', lw=6, label='Test Loss [Roll]')


plt.title('LOSS vs ITERATIONS', size='60')
plt.xlabel('Number of Iterations', size='40')
plt.ylabel('Loss', size='40')

plt.grid(True, linestyle='-.')
plt.tick_params(labelcolor='k', labelsize='30', width=3)

plt.legend(fontsize='30')
plt.show()

#plt.savefig(model_name + '-1-loss_vs_iterations.png', dpi=120)

The results from the loss graph reinforce the accuracy graph results. For the first 10,000 itertions, both the train and test loss rapidly decrease. This indicates that the model is fitting the data well. After a sizeable number of iterations ( >40,000) the test loss has stagnated while the training loss continues to decrease. This is another indication of overfitting.

In [None]:
'''ANALYZING ACCURACY AND LOSS'''

### ACCURACY ANALYSIS
print('Accuracy Analysis : ')
print('====================')

# Getting maximum accuracy
test_max_acc = np.max(test_acc_list)
test_roll_max_acc = np.max(test_roll_acc_list)

print("[Average]")
print('--- Maximum accuracy on test-set  : {:.2f}'.format(test_roll_max_acc))
print('--- Iteration at maximum accuracy : {}'.format(iter_list[test_roll_acc_list.index(test_roll_max_acc)]))
print("[Absolute]")
print('--- Maximum accuracy on test-set  : {:.2f}'.format(test_max_acc))
print('--- Iteration at maximum accuracy : {}'.format(iter_list[test_acc_list.index(test_max_acc)]))

### LOSS ANALYSIS
print('\nLoss Analysis : ')
print('====================')

# Getting minimum loss
test_min_loss = np.min(test_loss_list)
test_roll_min_loss = np.min(test_roll_loss_list)

print("[Average]")
print('--- Minimum loss on test-set  : {:.2f}'.format(test_roll_min_loss))
print('--- Iteration at minimum loss : {}'.format(iter_list[test_roll_loss_list.index(test_roll_min_loss)]))
print("[Absolute]")
print('--- Minimum  loss on test-set  : {:.2f}'.format(test_min_loss))
print('--- Iteration at minimum loss : {}'.format(iter_list[test_loss_list.index(test_min_loss)]))

# STORING THE MODEL

In [None]:
'''PICKLING RESULTS'''

iteration_to_save = '9K'
backup_folder = os.path.join('backup_files', model_name)
os.makedirs(backup_folder, exist_ok=True)

### EVALUATION PARAMETERS ###
# Useful in plotting and analysing graphs 
mdict = {}
list_name = ['iter_list', 'train_loss_list', 'train_acc_list', 'test_loss_list', 'test_acc_list']
for i in range(len(list_name)):
    mdict[list_name[i]] = eval(list_name[i])  

fileObject = open(os.path.join(backup_folder,'{}_{}_evalP'.format(model_name, iteration_to_save)),'wb')
pickle.dump(mdict,fileObject)   
fileObject.close()

### MODEL PARAMETERS ###
# Useful in further training the model
msd = model.state_dict()
fileObject = open(os.path.join(backup_folder,'{}_{}_modelP'.format(model_name, iteration_to_save)),'wb')
pickle.dump(msd,fileObject)   
fileObject.close()

### STATE-TIME DATA ###
timeState = [time_begin, time_end, str(iterr)]
fileObject = open(os.path.join(backup_folder,'{}_{}_timeState'.format(model_name, iteration_to_save)),'wb')
pickle.dump(timeState, fileObject)   
fileObject.close()

In [None]:
'''RESTORING PICKLED RESULTS'''

iteration_to_load = '90K'
backup_folder = os.path.join('backup_files', model_name)
os.makedirs(backup_folder, exist_ok=True)

### EVALUATION PARAMETERS ###
fileObject = open(os.path.join(backup_folder,'{}_{}_evalP'.format(model_name, iteration_to_load)),'rb')
mdictx = pickle.load(fileObject)  

iter_list = mdictx['iter_list']
train_loss_list = mdictx['train_loss_list']
train_acc_list = mdictx['train_acc_list']
test_loss_list = mdictx['test_loss_list']
test_acc_list = mdictx['test_acc_list']

### MODEL PARAMETERS ###
fileObject = open(os.path.join(backup_folder,'{}_{}_modelP'.format(model_name, iteration_to_load)),'rb')
msd = pickle.load(fileObject) 
model.load_state_dict(state_dict = msd)

### STATE-TIME DATA ###
fileObject = open(os.path.join(backup_folder,'{}_{}_timeState'.format(model_name, iteration_to_load)),'rb')
timeState = pickle.load(fileObject) 

time_begin = timeState[0]
time_end = timeState[1]
iterr = int(timeState[2])

In [None]:
'''TRIMMING THE DATA'''
# Useful if checkpoint measurements have been taken for values other than 'inspect_size' = 250.
# 'factor' effectively transforms the data such that 'inspect_size' decreses by a factor of its value.
factor = 5

### Backing up the Data
# iter_list_store = iter_list
# train_loss_list_store = train_loss_list
# train_acc_list_store = train_acc_list
# test_loss_list_store = test_loss_list
# test_acc_list_store = test_acc_list

### Trimming the data
# iter_list = [iter_list[i+factor-1] for i in range(len(iter_list)) if i%factor==0]
# train_loss_list = [train_loss_list[i+factor-1] for i in range(len(train_loss_list)) if i%factor==0]
# train_acc_list = [train_acc_list[i+factor-1] for i in range(len(train_acc_list)) if i%factor==0]
# test_loss_list = [test_loss_list[i+factor-1] for i in range(len(test_loss_list)) if i%factor==0]
# test_acc_list = [test_acc_list[i+factor-1] for i in range(len(test_acc_list)) if i%factor==0]

### Verifying if the trim was succesful 
# print(len(iter_list_store), len(iter_list))
# print(len(train_loss_list_store), len(train_loss_list))
# print(len(train_acc_list_store), len(train_acc_list))
# print(len(test_loss_list_store), len(test_loss_list))
# print(len(test_acc_list_store), len(test_acc_list))

# print(iter_list[:7],'\t', iter_list[-7:])
# print(iter_list_store[:7],'\t', iter_list_store[-7:])