<a href="https://colab.research.google.com/github/abhishekjagtap1/Handwritten-digits-recognition/blob/main/CNN_MNIST_classification_pytorch_backend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import numpy as np


In [2]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('training on cpu')
else:
    print('training on gpu')

training on gpu


In [4]:
import torchvision.transforms as transforms

num_workers = 0
batch_size = 25
valid_size = 0.2

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5), (0.5))])



train_data = torchvision.datasets.MNIST('data', train=True, download= True, transform = transform)
test_data = torchvision.datasets.MNIST('data', train= False, download = True, transform = transform)

In [5]:
def split_indices(n, validation_size):
    valid_indices = int(n*validation_size)
    shuffle_indices = np.random.permutation(n)
    return shuffle_indices[valid_indices:], shuffle_indices[:valid_indices]

In [6]:
train_indices, valid_indices = split_indices(len(train_data), valid_size)
print(len(train_indices), len(valid_indices))

48000 12000


In [7]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader


train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(train_data, batch_size = batch_size, sampler = train_sampler, num_workers = num_workers)

valid_sampler = SubsetRandomSampler(valid_indices)
valid_loader = DataLoader(train_data, batch_size = batch_size, sampler = valid_sampler, num_workers= num_workers)

classes = [0, 1 , 2 , 3, 4 , 5 , 6 , 7 , 8 , 9]
num_classes = len(classes)
print(num_classes)

10


In [8]:
#cnn architecture

import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        #first conv layer
        self.conv1 = nn.Conv2d(1, 16, kernel_size = 2, stride = 1, padding = 0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size = 2, stride = 1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size = 2, stride = 1)
        self.maxpool1 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(64, 128, kernel_size = 2, stride = 1)
        self.conv5 = nn.Conv2d(128, 256, kernel_size = 2, stride = 1)
        self.maxpool2 = nn.MaxPool2d(2, 2)
        self.conv6 = nn.Conv2d(256, 512, kernel_size = 2, stride = 1)
        self.maxpool3 = nn.MaxPool2d(2,2)
        #fully connected layer
        self.fc1 = nn.Linear(512*2*2, 1024)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 64)
        #output layer 
        self.fc4 = nn.Linear(64, num_classes)
        
        
        
    def forward(self, x):
        #conv layer activation function, forward propogating
        x = F.relu(self.conv1(x))#28*28*16
        x = F.relu(self.conv2(x))#28*28*32
        x = F.relu(self.conv3(x))#28*28*64
        x= self.maxpool1(x)#14*14*64
        x = F.relu(self.conv4(x))#14*14*128
        x = self.dropout(x)
        x = F.relu(self.conv5(x))#14*14*256
        x = self.dropout(x)
        x = self.maxpool2(x)#7*7*256
        x = F.relu(self.conv6(x))#7*7*512
        x= self.maxpool3(x)#3.5*3.5*512
        #print('conv6 after maxpooling size', x.shape)
        x = self.dropout(x)
        
        #fc
        x = x.view(-1, 512*2*2)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        
        #output layer
        x = self.fc4(x)
        return x
    
    
model = Net()
print(model)

if train_on_gpu:
    model.cuda()




Net(
  (conv1): Conv2d(1, 16, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(2, 2), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
  (conv5): Conv2d(128, 256, kernel_size=(2, 2), stride=(1, 1))
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv6): Conv2d(256, 512, kernel_size=(2, 2), stride=(1, 1))
  (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=1024, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc2): Linear(in_features=1024, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [9]:
#loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [10]:
#training the network
n_epochs = 30

for epoch in range(1, n_epochs+1):
    
    train_loss = 0.0
    valid_loss = 0.0
    
    model.train()
    for data, labels in train_loader:
      if train_on_gpu:
            data, labels = data.cuda(), labels.cuda()
            #initializing gradients zero
            optimizer.zero_grad()
            #forward propagate 
            output = model(data)
            #loss
            loss = criterion(output, labels)
            #backpropogating 
            loss.backward()
            #update parameters
            optimizer.step()
            
            #calculating the training loss
            train_loss += loss.item()*data.size(0)
        
        
        
    # validating the model  
    model.eval()
    
    for data, labels in valid_loader:
        if train_on_gpu:
            data, labels = data.cuda(), labels.cuda()
            
            output = model(data)
            loss = criterion(output, labels)
            
            valid_loss += loss.item()*data.size(0)
            
    
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    
    
    
    ### print the losses accordingly for each epoch
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
                        epoch, train_loss, valid_loss))
    

        
        
        
        
      
        


    torch.save(model, 'MNIST_CNN_Pytorch')

Epoch: 1 	Training Loss: 2.301883 	Validation Loss: 2.301170
Epoch: 2 	Training Loss: 2.301309 	Validation Loss: 2.300985
Epoch: 3 	Training Loss: 2.301195 	Validation Loss: 2.301009
Epoch: 4 	Training Loss: 2.300994 	Validation Loss: 2.300899
Epoch: 5 	Training Loss: 2.300617 	Validation Loss: 2.300165
Epoch: 6 	Training Loss: 2.298892 	Validation Loss: 2.297064
Epoch: 7 	Training Loss: 2.027823 	Validation Loss: 0.756171
Epoch: 8 	Training Loss: 0.366480 	Validation Loss: 0.170297
Epoch: 9 	Training Loss: 0.161318 	Validation Loss: 0.126596
Epoch: 10 	Training Loss: 0.115281 	Validation Loss: 0.095575
Epoch: 11 	Training Loss: 0.092325 	Validation Loss: 0.073457
Epoch: 12 	Training Loss: 0.076726 	Validation Loss: 0.065248
Epoch: 13 	Training Loss: 0.067156 	Validation Loss: 0.056973
Epoch: 14 	Training Loss: 0.059593 	Validation Loss: 0.053939
Epoch: 15 	Training Loss: 0.051966 	Validation Loss: 0.044223
Epoch: 16 	Training Loss: 0.047038 	Validation Loss: 0.043262
Epoch: 17 	Traini

In [11]:
model = torch.load('MNIST_CNN_Pytorch')
model.eval()

Net(
  (conv1): Conv2d(1, 16, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(2, 2), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
  (conv5): Conv2d(128, 256, kernel_size=(2, 2), stride=(1, 1))
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv6): Conv2d(256, 512, kernel_size=(2, 2), stride=(1, 1))
  (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=1024, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc2): Linear(in_features=1024, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)

In [12]:
#testing the network on new data
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers = num_workers)

test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()
for data, labels, in test_loader:
    if train_on_gpu:
        data, labels =data.cuda(), labels.cuda()

        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)

        _, pred = torch.max(output, 1)
        correct_tensor = pred.eq(labels.data.view_as(pred))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    
        for i in range(batch_size):
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.022681

Test Accuracy of     0: 99% (975/980)
Test Accuracy of     1: 99% (1133/1135)
Test Accuracy of     2: 99% (1024/1032)
Test Accuracy of     3: 99% (1004/1010)
Test Accuracy of     4: 99% (981/982)
Test Accuracy of     5: 98% (883/892)
Test Accuracy of     6: 99% (950/958)
Test Accuracy of     7: 99% (1023/1028)
Test Accuracy of     8: 98% (960/974)
Test Accuracy of     9: 98% (991/1009)

Test Accuracy (Overall): 99% (9924/10000)
