# Pytorch : CNN MNIST digit recognition

In [3]:
# Consider one architecture for CNN
# take motivation from SOA architectures
# we are going to use below architecture for MNIST problem
# Later we are also going to play around changing the architecture to see the performance
# input image size for MNIST dataset is 28x28

![cnn mnist](https://user-images.githubusercontent.com/30661597/61713471-3c957d00-ad8b-11e9-9a38-e3f4d1e72565.png)

In [19]:
#import required packages 
#impirt torch for using tensor and other basic pytorch utilities
import torch
# import torchvision package for the MNIST dataset and dataloader utility
from torchvision import datasets
from torch.utils.data import DataLoader
#import torch.nn module for building NN architecture
import torch.nn as nn
# import transform for normalizing data, without this the process with end with error below
# batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>
import torchvision.transforms as transforms

In [21]:
#creating dataset and dataloaders
# first we will check without normalization then apply normalization
# change the root path as per need based on running location for 
# colab it would be ./data if the data is there
train_dataset = datasets.MNIST(root = './data',
                               train = True,
                               transform=transforms.ToTensor(),
                              download = False)
test_dataset = datasets.MNIST(root = './data',
                               train = False,
                              transform=transforms.ToTensor(),
                              download = False)
train_dataloader = DataLoader(dataset=train_dataset,batch_size=100,shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset,batch_size=100,shuffle=False)

In [22]:
#check the length of the train dataloader
print('Train Dataloader : ')
print('No of batches:{}'.format(len(train_dataloader)))
print('batch size:{}'.format(train_dataloader.batch_size))
print('shape of data:{}'.format(train_dataloader.dataset.data.shape))

Train Dataloader : 
No of batches:600
batch size:100
shape of data:torch.Size([60000, 28, 28])


In [23]:
# Creating the CNN architecture
# Ref
'''
torch.nn.Conv2d(in_channels, 
                 out_channels, 
                 kernel_size, 
                 stride=1, 
                 padding=0, 
                 dilation=1, 
                 groups=1, 
                 bias=True, 
                 padding_mode='zeros')
Here we are considering zero padding.
Hence, to achive same size of the feature map as input we need to calculte the stride size.
We will use maxpooling to reduce the feature size
for conv1 
(28-3 + 2*padd)/1 + 1 = 28 ==> padd = 1
for maxpool we want the feature size to reduce to half so we need to use 2x2 size
for conv1 
(14-3 + 2*padd)/1 + 1 = 14 ==> padd = 1              
'''
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1,padding=1)
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.batchnorm1 = nn.BatchNorm2d(num_features=8)
        self.relu = nn.ReLU()
        #will check if we change the kernel size how it affects
        self.conv2 = nn.Conv2d(in_channels=8,out_channels=32,kernel_size=3,stride=1,padding=1)
        self.batchnorm2 = nn.BatchNorm2d(num_features=32)
        self.fc1 = nn.Linear(in_features=32*7*7,out_features=600)
        self.fc2 = nn.Linear(in_features=600,out_features=10)
        #self.droput = nn.Dropout(p=0.5)
    def forward(self,X):
        output = self.conv1(X)
        output = self.batchnorm1(output)
        output = self.relu(output)
        output = self.maxpool(output)
        output = self.conv2(output)
        output = self.batchnorm2(output)
        output = self.relu(output)
        output = self.maxpool(output)
        output = output.view(-1,1568)
        output = self.fc1(output)
        output = self.relu(output)
        output = self.fc2(output)
        return output      

In [41]:
model = CNN()
CUDA = torch.cuda.is_available()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr=0.01)

In [47]:
#training the model
epochs = 10
if CUDA:
    model = model.cuda()
model.train()
for epoch in range(epochs):
    iter_loss=0
    sample=0
    correct=0
    
    for i,(X,y) in enumerate(train_dataloader):
        if CUDA:
            X = X.cuda()
            y = y.cuda()
        output = model(X)
        loss = criterion(output,y) # we need to give output in raw format then y else will throw error
        iter_loss += loss.item()
        sample += y.shape[0]
        _,predicted = torch.max(output,axis = 1)
        correct += (predicted==y).sum()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('for epoch:{}, loss:{:.7f}, accuracy:{:.2f}%'.format(epoch,float(iter_loss/sample),float(100*correct/sample)))
        
        

for epoch:0, loss:0.0002184, accuracy:99.00%
for epoch:1, loss:0.0001784, accuracy:99.00%
for epoch:2, loss:0.0002000, accuracy:99.00%
for epoch:3, loss:0.0001649, accuracy:99.00%
for epoch:4, loss:0.0002204, accuracy:99.00%
for epoch:5, loss:0.0001681, accuracy:99.00%
for epoch:6, loss:0.0001574, accuracy:99.00%
for epoch:7, loss:0.0001500, accuracy:99.00%
for epoch:8, loss:0.0002019, accuracy:99.00%
for epoch:9, loss:0.0001113, accuracy:99.00%


In [49]:
if CUDA:
    model = model.cuda()
model.eval()
sample =0
iter_loss =0
correct =0
for i,(X,y) in enumerate(test_dataloader):
    if CUDA:
            X = X.cuda()
            y = y.cuda()
        output = model(X)
        loss = criterion(output,y) # we need to give output in raw format then y else will throw error
        iter_loss += loss.item()
        sample += y.shape[0]
        _,predicted = torch.max(output,axis = 1)
        correct += (predicted==y).sum()
        #optimizer.zero_grad()
        #loss.backward()
        #optimizer.step()
print('for test dataset loss:{:.7f}, accuracy:{:.6f}%'.format(epoch,float(iter_loss/sample),float(100*correct/sample)))

for test dataset loss:9.0000000, accuracy:0.000619%
