<a href="https://colab.research.google.com/github/aravindchakravarti/Deep_Learning_EVA_4/blob/master/WEEK_2/EVA4_Session_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import the future print function. May be for Python 3.x compatibility?
from __future__ import print_function

# Importing Torch, Base Class (nn.module) and torch support for functional 
# programming
import torch
import torch.nn as nn
import torch.nn.functional as F

# Importing the optimiser. SGD is used in the code
import torch.optim as optim

# Getting the datsets and tranformations (which will be done on Train dataset)
from torchvision import datasets, transforms

In [0]:
'''
nn.module is a base class for all the neural network modules. All the models
should sub-class from this class
'''
class Net(nn.Module):
    def __init__(self):
        # super() lets us to avoid referring base class explicitely. 
        super(Net, self).__init__()

        # Here we are defining the network. Our models input size is 28x28x1
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)     # input-28x28x1   Output-28x28x32  RF-3x3
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)    # input-28x28x32  Output-28x28x64  RF-5x5
        self.pool1 = nn.MaxPool2d(2, 2)                 # input-28x28x32  Output-14x14x64  RF-10x10 
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)   # input-14x14x64  Output-14x14x128 RF-12x12
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)  # input-14x14x128 Output-14x14x256 RF-14x14
        self.pool2 = nn.MaxPool2d(2, 2)                 # input-14x14x256 Output-7x7x256   RF-28x28
        self.conv5 = nn.Conv2d(256, 512, 3)             # input-7x7x256   Output-5x5x512   RF-30x30
        self.conv6 = nn.Conv2d(512, 1024, 3)            # input-5x5x512   Output-3x3x1024  RF-32x32
        self.conv7 = nn.Conv2d(1024, 10, 3)             # input-3x3x1024  Output-1x1x10    RF-34x34

    def forward(self, x):
        '''
          1. First input x is convolved with CONV1 and then RELU activation function is applied
             This output is convolved with again with CONV2 and RELU is used again. 
             Maxpooling is then applied to the output of CONV2 + RELU

          2. Step 1 is repeated

          3. Output of 2nd Maxpool is convolved with CONV5, then followed by RELU, CONV6 and RELU 
             again

          4. Output of 6th layer convolved again with by CONV7. But this time, we have 10 output
             channels, corresponding to 10 classes

          5. x.view converts 10x1x1 to 10 (1 dimension)
        '''
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
# We need Keras like summary output
!pip install torchsummary
from torchsummary import summary

# We are going to use CUDA. It is a parallel computing platfrom
# created by NVidia
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Creating model here
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [0]:
# Random numbers are no more random... They are random in a disciplined way :)
torch.manual_seed(1)
# Number of training samples at a time to be used
batch_size = 128

# Number of workers: How much of data should be available in batch
# pin_memory: Keep the memory ready in page..
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

''' Download and store the data in ../data 
    Make it train (else test)
    Also images are
'''
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:02, 3680306.04it/s]                             


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw


32768it [00:00, 304858.96it/s]                           
0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 1719232.51it/s]                            
8192it [00:00, 123503.33it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


In [0]:
# very low overhead progress bar
from tqdm import tqdm

# nll_loss = Negative Log-Likelyhood loss
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        # Make the gradient zero before iterating next time
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
# Next line can be commented <TBD>
model = Net().to(device)

# Use Stochastic Gradient Descend with learning rate of 0.01 & momentum = 0.9
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Training only for a epoch. But, if required, change 'range' value to increase epoch size
for epoch in range(1, 2):
    '''
    Train the model and test the validation accuracy
    model = NN model
    device = CUDA if used else CPU
    train_loader = Training batch
    optimizer = in this code, it is set to SGD
    epoch = Number of training cycles (set to 1)
    '''
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)