# Required Libraries
Calling upon all the heavenly gods

In [2]:
from __future__ import print_function
import torch
import torch.nn as nn                         #Torch NeuralNets
import torch.nn.functional as F               # Torch Functions
import torch.optim as optim                   # Torch Optimizer
from torchvision import datasets, transforms  # Dataset and Transdorms

nn.Module: Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes:

In [3]:
class Net(nn.Module):                       #Input- rows x columns x channels|| Output- rows x columns x channels || Receptive Field
    def __init__(self):                                          #________________________________ 
        super(Net, self).__init__()                              #|input    || Output   || RF    |
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)              #|28x28x1  || 28x28x32 || 3x3   |                     
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)             #|28x28x32 || 28x28x64 || 5x5   |
        self.pool1 = nn.MaxPool2d(2, 2)                          #|28x28x64 || 14x14x64 || 10x10 | 
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)            #|14x14x64 || 14x14x128|| 12x12 |
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)           #|14x14x128|| 14x14x256|| 14x14 |
        self.pool2 = nn.MaxPool2d(2, 2)                          #|14x14x256|| 7x7x256  || 28x28 |
        self.conv5 = nn.Conv2d(256, 512, 3)                      #|7x7x256  || 7x7x512  || 30x30 |
        self.conv6 = nn.Conv2d(512, 1024, 3)                     #|7x7x1024 || 7x7x1024 || 32x32 |
        self.conv7 = nn.Conv2d(1024, 10, 3)                      #|7x7x1024 || 7x7x10   || 34x32 |
                                                                 #|______________________________|
     #Here we are defining our neural network Architecture
    "Defines computations to be performed everytime the function is called"
    def forward(self, x):         
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))        # ReLU(Conv1) -> ReLU(Conv2) -> MaxPool1
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))        # ReLU(Conv3) -> ReLU(Conv4) -> MaxPool2
        x = F.relu(self.conv6(F.relu(self.conv5(x))))                    # ReLU(Conv5) -> ReLU(Conv6)
        #x = F.relu(self.conv7(x))                                       # ReLU(Conv7) 
        "We don't need to apply relu on the final layer"
        x = self.conv7(x)                                                #Conv7
        x = x.view(-1, 10)
        return F.log_softmax(x)                                          #Returns softmax of the final output

#Submodules assigned in this way will be registered, and will have their parameters converted too when you call to(), etc.

Torchsummary is used to plot the “forward()” structure in PyTorch.

Cuda is Nvidia GPU platform for Deep Learning

In [4]:
!pip install torchsummary
from torchsummary import summary

use_cuda = torch.cuda.is_available() # check if nvidia cuda gpu is available

device = torch.device("cuda" if use_cuda else "cpu") # Switch to CPU if GPU is not availiable
model = Net().to(device)
summary(model, input_size=(1, 28, 28)) #Plots model summary or the forward function.

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



#Seeding & Reproduciblity

Completely reproducible results are not guaranteed across PyTorch releases, individual commits or different platforms. Furthermore, results need not be reproducible between CPU and GPU executions, even when using identical seeds.

However, in order to make computations deterministic on your specific problem on one specific platform and PyTorch release, there are a couple of steps to take.

There are two pseudorandom number generators involved in PyTorch, which you will need to seed manually to make runs reproducible. Furthermore, you should ensure that all other libraries your code relies on and which use random numbers also use a fixed seed.

The seed method is used to initialize the pseudorandom number generator in Pytorch.If seed value is not present it takes system current time. if you provide same seed value before generating random data it will produce the same data. 

In [5]:
torch.manual_seed(1) #Model seeding to obtain consistent result
batch_size = 128     #Batch size defines number of samples that going to be propagated through the network in a single go. 

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # this makes sure that the data stays in the memory

# Load the MNIST dataset and performs Normalization on the each channel of the image data.

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

#Load the test dataset and perform normalization
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...



Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
from tqdm import tqdm # tqdm Instantly make your loops show a smart progress meter 

"""This function trains the model"""
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)   # move the data to the device
        optimizer.zero_grad()                               # Make gradient zero for optimizer
        output = model(data)                                # Model output
        loss = F.nll_loss(output, target)                   # Loss is Negative Log Likelihood
        loss.backward()                                     # Propogate the gradient backward
 # optimizer.step is performs a parameter update based on the current gradient (stored in .grad attribute of a parameter) and the update rule    
        optimizer.step() 
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')  #make your loops show a smart progress meter


"""This function is for testing the model"""
def test(model, device, test_loader):

    # set the model on eval mode
    model.eval()

    # set the test loss to zero
    test_loss = 0

    # number of correct classifications
    correct = 0

    # turn off gradients, since we are in test mode
    with torch.no_grad():
        for data, target in test_loader:
            # move the data to device
            data, target = data.to(device), target.to(device)

            # get the model output
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [7]:
# move the model to device
model = Net().to(device)
# stochastic gradient descent with model parameters, learning rate and momentum
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)

# run the model for range number of times
for epoch in range(1, 2):
    # train the model
    train(model, device, train_loader, optimizer, epoch)

    # test the model
    test(model, device, test_loader)
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

loss=0.018139509484171867 batch_id=468: 100%|██████████| 469/469 [00:36<00:00, 13.01it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0492, Accuracy: 9862/10000 (99%)



loss=0.16001607477664948 batch_id=468: 100%|██████████| 469/469 [00:36<00:00, 12.96it/s]



Test set: Average loss: 0.0569, Accuracy: 9809/10000 (98%)

