<a href="https://colab.research.google.com/github/alfie1104/deeplearning-with-pytorch/blob/main/6_basic_cnn_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# convert MNIST Image files into a Tensor of 4-Dimensions(# of images, height, width, color channel)
transform = transforms.ToTensor()

In [3]:
# Train Data
train_data = datasets.MNIST(root='/cnn_data', train=True, download=True, transform=transform)

In [4]:
# Test Data
test_data = datasets.MNIST(root='/cnn_data', train=False, download=True, transform=transform)

In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
# Create a small batch size for images... let's say 10
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [8]:
# Define CNN model
# Describe convolutional layer and what it's doing (2 convolutional layers)
# This is just an example in the next video we'll build out the actual model
conv1 = nn.Conv2d(1, 6, 3,1) # input, output, kernel, stride
conv2 = nn.Conv2d(6, 16, 3,1)

In [9]:
# Grab 1 MNIST record/image
for i, (X_Train, y_train) in enumerate(train_data):
  break

In [10]:
X_Train.shape # torch.Size([1, 28, 28]) -> the size of MNIST data is 28 x 28 pixels

torch.Size([1, 28, 28])

In [11]:
x = X_Train.view(1,1,28,28)

In [12]:
# Perform our first convolution
x = F.relu(conv1(x)) # Rectified Linear Unit for our activation function

In [14]:
x.shape # torch.Size([1, 6, 26, 26]) -> 1 single image, 6 is the filters we asked for, 26 x 26 (original MNIST data is 28x28. because we didn't set any padding at convolutional layer, it is shirinked to 26x26)

torch.Size([1, 6, 26, 26])

In [15]:
# pass thru the pooling layer
x = F.max_pool2d(x, 2, 2) # kernel of 2 and stride of 2

In [17]:
x.shape # torch.Size([1, 6, 13, 13]) -> 1 single image, 6 is the filters we asked for, 13 x 13 from 26/2 x 26/2

torch.Size([1, 6, 13, 13])

In [18]:
# Do our second convolutional layer
x = F.relu(conv2(x))

In [21]:
x.shape # torch.Size([1, 16, 11, 11]) -> 1 single image, 16 filters we defined, 11 x 11 is shirinked from 13 x 13 (we didnt't set padding, so we lose 2 pixels around the outside of the image)

torch.Size([1, 16, 11, 11])

In [22]:
# Pooling layers
x = F.max_pool2d(x, 2, 2)

In [24]:
x.shape # torch.Size([1, 16, 5, 5]) -> -> 11/2 = 5.5 but we have to round down because we can't invent data to round up

torch.Size([1, 16, 5, 5])

In [25]:
# ((28[original]-2[first padding]) / 2[first pooling] - 2[second padding])/ 2[second pooling]
((28-2)/2-2)/2

5.5

In [26]:
# Model Class
class ConvolutionalNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 6, 3, 1)
    self.conv2 = nn.Conv2d(6, 16, 3, 1)
    # Fully Connected Layer
    self.fc1 = nn.Linear(5*5*16, 120) # 5x5 pixels * 16 outputs from conv2 laye, 120 is chosen arbitrarily
    self.fc2 = nn.Linear(120, 84) # 120 and 84 are arbitrarily chosen
    self.fc3 = nn.Linear(84, 10) # 10 numbers (0 ~ 9)

  def forward(self, X):
    X = F.relu(self.conv1(X))
    X = F.max_pool2d(X, 2, 2) # 2 : 2 x 2 kernel,  2 : stride 2
    # Second Pass
    X = F.relu(self.conv2(X))
    X = F.max_pool2d(X, 2, 2) # 2 : 2 x 2 kernel,  2 : stride 2

    # Re-View to flatten it out
    X = X.view(-1, 16*5*5) # negative one so that we can vary the batch size

    # Fully Connected Layers
    X = F.relu(self.fc1(X))
    X = F.relu(self.fc2(X))
    X = self.fc3(X)
    return F.log_softmax(X, dim=1)


In [27]:
# Create an instance of our Model
torch.manual_seed(41)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [28]:
# Loss Function Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Smaller the Learning Rate, longer its gonna take to train.