<a href="https://colab.research.google.com/github/abhiiyer/ERA1/blob/main/Abhishek_ERA_V1_S2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch
import torch.nn as nn # torch neural network
import torch.nn.functional as F # torch functions
import torch.optim as optim # optimizer
from torchvision import datasets, transforms # datasets and transforms
!pip install torchsummary
from torchsummary import summary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
use_cuda = torch.cuda.is_available() # check if nvidia cuda gpu is available
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

In [3]:
# set the batch size (normally 2^x)
batch_size = 128 

# load the train data and perform standard normalization
# Normalize does the following for each channel:
# image = (image - mean) / std
# The parameters mean, std
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True)

# load the test data and perform standard normalization
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 352796965.91it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 92611386.72it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 130527921.57it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 15640828.22it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



# Some Notes on our naive model

We are going to write a network based on what we have learnt so far. 

The size of the input image is 28x28x1. We are going to add as many layers as required to reach RF = 32 "atleast". 

In [6]:
"""nn.Module: Base class for all neural network modules.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes:
"""

class FirstDNN(nn.Module):
  def __init__(self):
    super(FirstDNN, self).__init__()
    # r_in:1, n_in:28, j_in:1, s:1, r_out:3, n_out:28, j_out:1
    self.conv1 = nn.Conv2d(1, 32, 3, padding=1)   # input - 1x28x28   | output - 32x28x28     | RF - 3x3
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  # input - 32x28x28  | output - 64x28x28     | RF - 5x5
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.pool1 = nn.MaxPool2d(2, 2)  # input - 64x28x28  | output - 64x14x14     | RF - 10x10
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.conv3 = nn.Conv2d(64, 128, 3, padding=1)  # input - 64x14x14  | output - 128x14x14    | RF - 12x12
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.conv4 = nn.Conv2d(128, 256, 3, padding = 1)   # input - 128x14x14 | output - 256x14x14    | RF - 14x14
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.pool2 = nn.MaxPool2d(2, 2)  # input - 256x14x14 | output - 256x7x7      | RF - 28x28
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.conv5 = nn.Conv2d(256, 512, 3)  # input - 256x7x7   | output - 512x5x5      | RF - 30x30
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out:
    self.conv6 = nn.Conv2d(512, 1024, 3)  # input - 512x5x5   | output - 1024x3x3     | RF - 32x32
    # r_in: , n_in: , j_in: , s: , r_out: , n_out: , j_out: 
    self.conv7 = nn.Conv2d(1024, 10, 3)  # input - 1024x3x3  | output - 10x1x1       | RF - 34x34

    # self.fc1 = nn.Linear(9216, 128)
    # self.fc2 = nn.Linear(128, 10)

  """forward
    Defines the computation performed at every call.

    Args:
        x: the input

    Returns:
        log_softmax(x)
    """
  def forward(self, x):
    x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))   # performs conv1 -> relu -> conv2 -> relu -> pool1
    x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))   # performs conv3 -> relu -> conv4 -> relu -> pool2
    x = F.relu(self.conv6(F.relu(self.conv5(x))))               # performs conv5 -> relu -> conv6 -> relu
    x = F.relu(self.conv7(x))                                   # performs conv7 
    x = x.view(-1, 10)                                          # similar to reshape in numpy

    # x = torch.flatten(x, 1)
    # x = F.relu(self.fc1(x))
    # x = self.fc2(x)

    return F.log_softmax(x)

    # never use ReLU in the last layer, the model cannot laern negative values just by removing F.relu() we get 99% accuracy.
    
    


In [7]:
model = FirstDNN().to(device) # transfer the model to the device chosen above

In [8]:
# print the summary of the model
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------

  return F.log_softmax(x)


In [9]:
from tqdm import tqdm

"""trains the model

Args
    model: the model to be trained
    device: the device on which to be trained, cpu/gpu
    train_loader: the train data loader from torch.utils.data.DataLoader
    optimizer: the optimizer to use for training
    epoch: the number of epoch to run for

Returns
    None
"""

def train(model, device, train_loader, optimizer, epoch):
    # set the model on train mode
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)  # move the data to the device
        optimizer.zero_grad()          # zero the gradients
        output = model(data)    # get the model output for the data
        loss = F.nll_loss(output, target)  # loss is negative log likelihood
        loss.backward()   # flow the gradients backward
        optimizer.step()   # optimizer.step is performs a parameter update based on the current gradient 
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')   # this is just for pretty printing

"""tests the model

Args
    model: the model to test
    device: the device to use
    test_loader: the test data loader from torch.utils.data.DataLoader
"""
def test(model, device, test_loader):
    model.eval()  # set the model on eval mode
    test_loss = 0      # set the test loss to zero
    correct = 0     # number of correct classifications
    
    # turn off gradients, since we are in test mode
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)  # move the data to device
            output = model(data)  # get the model output
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [10]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # stochastic gradient descent with model parameters, learning rate and momentum

# run the model for range number of times
for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  return F.log_softmax(x)
loss=0.5803605914115906 batch_id=468: 100%|██████████| 469/469 [00:31<00:00, 14.86it/s]



Test set: Average loss: 0.7298, Accuracy: 7015/10000 (70%)

