<a href="https://colab.research.google.com/github/arch1781993/Learning_Pytorch_DeepLearning/blob/main/6_CNN_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST dataset

In [62]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [63]:
# Next before we import the data, we need to transform it. the images are 2d, but we need to be working in 4 dimensions,
# we need a tensor with 4 dimensions for (to track no of images, height, width and color channel)

# Convert MNIST image files into a tensor of 4 dimensions (# of images, height, width and color channel)
transform = transforms.ToTensor()

In [64]:
# train data
# MNIST data is already in datasets library, root = 'CNN_data' means we are saving locally on Colab, train = True means it is a training data
train_data = datasets.MNIST(root = '/CNN_data', train = True, download=True, transform=transform)

In [65]:
# test data
test_data = datasets.MNIST(root = '/CNN_data', train = False, download=True, transform=transform)

In [66]:
# Creating small batch size of images for loading, lets say 10
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [67]:
# Define our model
# Describing convulutional layer and what it is doing (2 convulutional layers)
# This is just an example, later we will build the whole model
conv1 = nn.Conv2d(1,6,3,1)  # this is first convulutional layer: 1 input image, 6 outputs in featured map (with 6 filters), 3*3 kernel and 1 stride
                            # Here we have not defined padding (so the images will be reduced by 2 pixels) and we dont care since the digits are in the middle of the images
                            # and we dont care what is happening at the edges
conv2 = nn.Conv2d(6,16,3,1) # this is second convulutional layer: 6 input image (since there were 6 outputs from 1 con layer),
                            # 16 outputs in featured map, 3*3 kernel and 1 stride

In [68]:
# Grab 1 MNIST image
for i, (X_train,y_train) in enumerate(train_data):  # Here X_train is the actual image and y_train is the label associated with this
  break

In [69]:
X_train.shape # 1 iamge and size is 28*28 pixels

torch.Size([1, 28, 28])

In [70]:
# since we are working with 4d tensor, lets transform this image into 4 dimension
x = X_train.view(1,1,28,28)        # 1st batch, 1st image, 28 height, 28 width

In [71]:
# perform our first convolution
x = F.relu(conv1(x)) # Rectified Linear Unit for our activation function

In [72]:
x.shape      # 1st batch, 6 output (since there were 6 filters each 3*3), 26 height and 26 width

torch.Size([1, 6, 26, 26])

In [73]:
# Pass through the pooling layer
x = F.max_pool2d(x,2,2) # 2*2 kernel and 2 stride

In [74]:
x.shape # 26/2 = 13

torch.Size([1, 6, 13, 13])

In [75]:
# perform our second convolution
x = F.relu(conv2(x))

In [76]:
x.shape # this time 16 output (why not 6*16, since there are 16 filters?). In 2nd convulution
        # each of the 16 filters is not a flat 3*3 square. Instead, each filter is a 3D volume of 3*3*6.
        # One filter slides across all 6 input feature maps at the same time. It performs a calculation on all 6 layers and sums them together into one single value.

torch.Size([1, 16, 11, 11])

In [77]:
# 2nd pooling layer
x = F.max_pool2d(x,2,2)

In [78]:
x.shape # 11/2 = 5.5, round down to 5, because we cant invent data to round up to 6

torch.Size([1, 16, 5, 5])

In [79]:
# Model class
class convulutionalNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1,6,3,1)
    self.conv2 = nn.Conv2d(6,16,3,1)
    # Fully connected layer (see figure)
    self.fc1 = nn.Linear(16*5*5, 120) # 16*5*5 is the output from 2nd pooling and passing it to 120 neurons
    self.fc2 = nn.Linear(120, 84)     # From 120 neurons to 84 neurons
    self.fc3 = nn.Linear(84, 10)      # From 84 neurons to 10 neurons

  def forward(self,X):
    X = F.relu(self.conv1(X))
    X = F.max_pool2d(X,2,2)
    # Second pass
    X = F.relu(self.conv2(X))
    X = F.max_pool2d(X,2,2)

    # Re-view to flatten it out
    X = X.view(-1,16*5*5) # -1 so that we can vary the batch size

    # Fully connected layers
    X = F.relu(self.fc1(X))
    X = F.relu(self.fc2(X))
    X = self.fc3(X)
    return F.log_softmax(X, dim=1) # It turns the raw numbers coming out of your last linear layer into logarithmic probabilities. In simple terms, it tells you which digit (0â€“9) the AI thinks it is looking at.
                                   # The dim=1 Part, This tells PyTorch which "direction" to calculate the probability. Since your data is in batches,
                                   # your X looks like a table: Rows are the images, and Columns are the classes (0-9). dim=1 tells the computer:
                                   # "Calculate the probability across the columns (the classes) for each image."

In [80]:
# Create an instance
torch.manual_seed(41)
model = convulutionalNetwork()
model

convulutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [81]:
# Loss function optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.001) # Smaller the LR, the longer it gonna take to train