<a href="https://colab.research.google.com/github/arpitp2912/Deep_Learning_Assignments/blob/master/MNIST_CNN/MNIST_Pytorch_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Convolutional Neural Network - MNIST Classification

### Import Libraries

In [0]:
import numpy as np 
import pandas as pd 
import torch

from torch import nn
import torch.nn.functional as F
from torchvision import datasets,transforms

In [3]:
# Check if GPU is available

train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('GPU is not available.')
else:
    print('GPU is available')

GPU is available


### Loading Datasets

In [0]:
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
trainset=datasets.MNIST('~/.pytorch/MNIST_data/',train=True,transform=transform,download=True)
validset=datasets.MNIST('~/.pytorch/MNIST_data/',train=False,transform=transform,download=True)

train_loader=torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True,num_workers=0)
valid_loader=torch.utils.data.DataLoader(validset,batch_size=64,shuffle=True,num_workers=0)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:02, 4020668.32it/s]                             


Extracting /root/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 67848.17it/s]                            
0it [00:00, ?it/s]

Extracting /root/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 1131413.45it/s]                            
0it [00:00, ?it/s]

Extracting /root/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 25639.61it/s]            

Extracting /root/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!





### Define CNN Architecture

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # First Convolutional layer with 10 filters of size 3
        self.conv1 = nn.Conv2d(1, 10, 3, 1)
        
        # Second Convolutional layer with 15 filters of size 3
        self.conv2 = nn.Conv2d(10, 15, 3, 1)
        
        # Fully-Connected Layers
        self.fc1 = nn.Linear(5*5*15, 50)
        self.dropout1 = nn.Dropout(0.25)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        # Apply ReLU activation on convolutional layers
        x = F.relu(self.conv1(x))
        # Apply Max pooling
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 5*5*15)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
      
      
# create a complete CNN
model = Net()
print(model)

# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

Net(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 15, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=375, out_features=50, bias=True)
  (dropout1): Dropout(p=0.25)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


**Total No. of Parameters in the Network**

In [0]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Total_params",pytorch_total_params)
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Trainable_params",pytorch_total_params)

Total_params 20775
Trainable_params 20775


### Define Loss Function and Optimizer

In [0]:
import torch.optim as optim
criterion = nn.NLLLoss()

# specify optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

### Train the Model

In [0]:
# Number of epochs to train the model
n_epochs = 10

valid_loss_min = np.Inf # Track change in validation loss

for epoch in range(1, n_epochs+1):

    # Track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    # Model Training
    model.train()
    for data, target in train_loader:
        # Move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
            
        # Clear the gradients of all optimized variables
        optimizer.zero_grad()
        
        # Forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        
        # Calculate the batch loss
        loss = criterion(output, target)
        
        # Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        
        # Perform a single optimization step (parameter update)
        optimizer.step()
        
        # Ipdate training loss
        train_loss += loss.item()*data.size(0)
        
    # Model Evaluation
    model.eval()
    for data, target in valid_loader:
        # Move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # Forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # Calculate the batch loss
        loss = criterion(output, target)
        # Update average validation loss 
        valid_loss += loss.item()*data.size(0)
    
    # Calculate average losses
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
        
    # Print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # Save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_mnist.pt')
        valid_loss_min = valid_loss

Epoch: 1 	Training Loss: 0.954072 	Validation Loss: 0.338915
Validation loss decreased (inf --> 0.338915).  Saving model ...
Epoch: 2 	Training Loss: 0.355702 	Validation Loss: 0.243871
Validation loss decreased (0.338915 --> 0.243871).  Saving model ...
Epoch: 3 	Training Loss: 0.246100 	Validation Loss: 0.141469
Validation loss decreased (0.243871 --> 0.141469).  Saving model ...
Epoch: 4 	Training Loss: 0.194155 	Validation Loss: 0.111442
Validation loss decreased (0.141469 --> 0.111442).  Saving model ...
Epoch: 5 	Training Loss: 0.160212 	Validation Loss: 0.104872
Validation loss decreased (0.111442 --> 0.104872).  Saving model ...
Epoch: 6 	Training Loss: 0.141925 	Validation Loss: 0.081309
Validation loss decreased (0.104872 --> 0.081309).  Saving model ...
Epoch: 7 	Training Loss: 0.128147 	Validation Loss: 0.076264
Validation loss decreased (0.081309 --> 0.076264).  Saving model ...
Epoch: 8 	Training Loss: 0.119253 	Validation Loss: 0.067575
Validation loss decreased (0.07626

### Model Testing

In [0]:
# specify the image classes
classes = ['0', '1', '2', '3', '4',
           '5', '6', '7', '8', '9']

In [0]:
batch_size=16

# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()
# iterate over test data
for data, target in valid_loader:
  
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
        
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    
    # calculate the batch loss
    loss = criterion(output, target)
    
    # update test loss 
    test_loss += loss.item()*data.size(0)
    
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)  
    
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    
    # calculate test accuracy for each object class
    for i in range(batch_size):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(valid_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.064583

Test Accuracy of     0: 97% (229/234)
Test Accuracy of     1: 99% (280/281)
Test Accuracy of     2: 98% (259/263)
Test Accuracy of     3: 97% (239/246)
Test Accuracy of     4: 99% (271/273)
Test Accuracy of     5: 98% (224/228)
Test Accuracy of     6: 98% (225/229)
Test Accuracy of     7: 97% (251/257)
Test Accuracy of     8: 96% (242/250)
Test Accuracy of     9: 96% (243/251)

Test Accuracy (Overall): 98% (2463/2512)


**We have achieved accuracy of 98% with 20775 trainable parameters**