<a href="https://colab.research.google.com/github/amar-naik/EVA/blob/master/S4/EVA4_Session_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function
import torch # PyTorch is a python package that provides two high-level features: - Tensor computation (like numpy) with strong GPU acceleration - Deep. Neural Networks built on a tape-based autograd system
import torch.nn as nn
import torch.nn.functional as F #This module contains all the functions in the torch.nn library (whereas other parts of the library contain classes). As well as a wide range of loss and activation functions, you’ll also find here some convenient functions for creating neural nets, such as pooling functions.
import torch.optim as optim
from torchvision import datasets, transforms # The torchvision package consists of popular datasets, model architectures, and common image transformations for computer vision
#torch.nn
#Module: creates a callable which behaves like a function, but can also contain state(such as neural net layer weights). It knows what Parameter (s) it contains and can zero all their gradients, loop through them for weight updates, etc.
#Parameter: a wrapper for a tensor that tells a Module that it has weights that need updating during backprop. Only tensors with the requires_grad attribute set are updated
#functional: a module(usually imported into the F namespace by convention) which contains activation functions, loss functions, etc, as well as non-stateful versions of layers such as convolutional and linear layers.
#torch.optim: Contains optimizers such as SGD, which update the weights of Parameter during the backward step

In [0]:
#We use the Conv2d layer because our image data is two dimensional.
#The padding argument indicates how much  padding is added to the edges of the data during computation.

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #
        self.conv1_BN = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 12, 3, padding=1) 
        self.conv2_BN = nn.BatchNorm2d(12)
        self.pool1 = nn.MaxPool2d(2, 2) 
        self.drop_out = nn.Dropout(0.25)
        self.conv3 = nn.Conv2d(12, 16, 3, padding=1) 
        self.conv3_BN = nn.BatchNorm2d(16)
        self.conv4 = nn.Conv2d(16, 18, 3, padding=1) 
        self.conv4_BN = nn.BatchNorm2d(18)
        self.pool2 = nn.MaxPool2d(2, 2)  
        self.drop_out = nn.Dropout(0.25)
        self.conv5 = nn.Conv2d(18, 24, 3) 
        self.conv5_BN = nn.BatchNorm2d(24)
        self.conv6 = nn.Conv2d(24, 28, 3)
        self.conv6_BN = nn.BatchNorm2d(28)
        self.conv7 = nn.Conv2d(28, 17, 3) 
        self.conv7_BN = nn.BatchNorm2d(17)
        #self.conv7 = nn.Dropout(0.25)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2_BN(self.conv2(F.relu(self.conv1_BN(self.conv1(x)))))))
        x = self.pool2(F.relu(self.conv4_BN(self.conv4(F.relu(self.conv3_BN(self.conv3(x)))))))
        x = F.relu(self.conv6_BN(self.conv6(F.relu(self.conv5_BN(self.conv5(x))))))
        x = F.relu(self.conv7_BN(self.conv7(x)))
        #x = F.relu(self.drop_out(x))
        x = x.view(-1, 17) # expecting a output of 1X1 
        #x = x.flatten(0,1)
        return F.log_softmax(x)

In [18]:
!pip install torchsummary #you can get exact Keras representation, using pytorch-summary package
from torchsummary import summary
use_cuda = torch.cuda.is_available() # This package adds support for CUDA tensor types, that implement the same function as CPU tensors, 
#but they utilize GPUs for computation
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device) ## to(device) moves the model weights to GPU.
summary(model, input_size=(1, 28, 28)) # input image size

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
            Conv2d-3           [-1, 12, 28, 28]             876
       BatchNorm2d-4           [-1, 12, 28, 28]              24
         MaxPool2d-5           [-1, 12, 14, 14]               0
            Conv2d-6           [-1, 16, 14, 14]           1,744
       BatchNorm2d-7           [-1, 16, 14, 14]              32
            Conv2d-8           [-1, 18, 14, 14]           2,610
       BatchNorm2d-9           [-1, 18, 14, 14]              36
        MaxPool2d-10             [-1, 18, 7, 7]               0
           Conv2d-11             [-1, 24, 5, 5]           3,912
      BatchNorm2d-12             [-1, 24, 5, 5]              48
           Conv2d-13             [-1, 28, 3, 3]           6,076
      BatchNorm2d-14             [-1, 2



In [0]:
torch.manual_seed(1) # Sets the seed for generating random numbers. Returns a torch.Generator object. It is recommended to set a large seed, i.e. a number that has a good balance of 0 and 1 bits. Avoid having many 0 bits in the seed
batch_size = 128
#transforms.Compose just clubs all the transforms provided to it. So, all the transforms in the transforms.Compose are applied to the input one by one
#transforms.ToTensor(): This just converts your input image to PyTorch tensor.
#transforms.Normalize((0.1307,), (0.3081,)): typically used data scaling and these values (mean and std) must have been 
#precomputed for your dataset. Changing these values is also not advised.

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
#Combines a dataset and a sampler, and provides an iterable over the given dataset. The :class:`~torch.utils.data.DataLoader` 
#supports both map-style and iterable-style datasets with single- or multi-process loading, customizing loading order and 
#optional automatic batching (collation) and memory pinning.

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:
from tqdm import tqdm # add progress bars to Python code is with tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() #Clears the gradients of all optimized torch.Tensor s.
        output = model(data)
        loss = F.nll_loss(output, target) #The negative log likelihood loss
        loss.backward() ## Back Propagation
        optimizer.step() ## Gradient Descent
        #Recall that when initializing optimizer you explicitly tell it what parameters (tensors) of the model it should be updating. 
        #The gradients are "stored" by the tensors themselves (they have a grad and a requires_grad attributes) once you call backward() on the loss. 
        #After computing the gradients for all tensors in the model, calling optimizer.step() makes the optimizer iterate over all parameters (tensors) 
        #it is supposed to update and use their internally stored grad to update their values.
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    # since we need to print 99.4 Accuracy we need atleast one values after radix to be printed. Hence changing .0f in accuracy to .2f
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [23]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.02, momentum=0.7)

for epoch in range(0, 20):
    print ("epoch", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  0%|          | 0/469 [00:00<?, ?it/s]

epoch 0


loss=0.1380969136953354 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.88it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1365, Accuracy: 9798/10000 (97.98%)

epoch 1


loss=0.10474128276109695 batch_id=468: 100%|██████████| 469/469 [00:13<00:00, 35.50it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0544, Accuracy: 9895/10000 (98.95%)

epoch 2


loss=0.02398555539548397 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.87it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0426, Accuracy: 9903/10000 (99.03%)

epoch 3


loss=0.0184024628251791 batch_id=468: 100%|██████████| 469/469 [00:13<00:00, 33.84it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0421, Accuracy: 9924/10000 (99.24%)

epoch 4


loss=0.0529310405254364 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.04it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0337, Accuracy: 9926/10000 (99.26%)

epoch 5


loss=0.10895514488220215 batch_id=468: 100%|██████████| 469/469 [00:13<00:00, 35.15it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0305, Accuracy: 9930/10000 (99.30%)

epoch 6


loss=0.027787959203124046 batch_id=468: 100%|██████████| 469/469 [00:13<00:00, 33.81it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0313, Accuracy: 9934/10000 (99.34%)

epoch 7


loss=0.006654143333435059 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 36.31it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0389, Accuracy: 9900/10000 (99.00%)

epoch 8


loss=0.011768211610615253 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.37it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0293, Accuracy: 9925/10000 (99.25%)

epoch 9


loss=0.011778314597904682 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 33.21it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0287, Accuracy: 9920/10000 (99.20%)

epoch 10


loss=0.019009670242667198 batch_id=468: 100%|██████████| 469/469 [00:13<00:00, 34.05it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0279, Accuracy: 9923/10000 (99.23%)

epoch 11


loss=0.019745951518416405 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 33.20it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0269, Accuracy: 9934/10000 (99.34%)

epoch 12


loss=0.019543210044503212 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.16it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0256, Accuracy: 9930/10000 (99.30%)

epoch 13


loss=0.02384628914296627 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 33.16it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0246, Accuracy: 9935/10000 (99.35%)

epoch 14


loss=0.011026074178516865 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 33.22it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0239, Accuracy: 9937/10000 (99.37%)

epoch 15


loss=0.00301940250210464 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.81it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0253, Accuracy: 9930/10000 (99.30%)

epoch 16


loss=0.008526623249053955 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.79it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0239, Accuracy: 9934/10000 (99.34%)

epoch 17


loss=0.0026768248062580824 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.18it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0249, Accuracy: 9940/10000 (99.40%)

epoch 18


loss=0.005864972714334726 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 33.30it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0268, Accuracy: 9924/10000 (99.24%)

epoch 19


loss=0.012511556036770344 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.06it/s]



Test set: Average loss: 0.0234, Accuracy: 9939/10000 (99.39%)

