* Repository github url : https://github.com/jai-mr/
* Assignment Repository : https://github.com/jai-mr/Assignment-2
* Submitted by : Jaideep Rangnekar 
* Registered email id : jaideepmr@gmail.com



In [1]:
# Import the libraries
from __future__ import print_function # import the print functions
import torch  # import the torch library
import torch.nn as nn  # import torch neural network
import torch.nn.functional as F # import torch functions
import torch.optim as optim # import optimizer 
from torchvision import datasets, transforms # import # datasets and transforms

In [2]:
class Net(nn.Module):
    # Custom Neural Network , This is a class extended from pytorch inbuilt class 'Module'.
    def __init__(self):
        # Call base class constrcutor
        super(Net, self).__init__()                                         # format - <channels> x <rows> x <cols>

        # Defining different layers convolution & pool
        # Conv2d represents a Convolution on an image of 2D.
        #     First Parameter: Input no of channels           - [1] - Black/White image
        #     Second Parameter: Output no of channels         - [32]
        #     Third Parameter: Kernel size such as 3 for 3x3  - [3]
        # MaxPool represents Max pooling which reduces the image size.
        # Last Conv2D: It has output channel size of 10 which is the no of classes, here it is digits 0-9.

        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)                         # input - 1x28x28   - output - 32x28x28     - RF - 3x3
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)                        # input - 32x28x28  - output - 64x28x28     - RF - 5x5
        self.pool1 = nn.MaxPool2d(2, 2)                                     # input - 64x28x28  - output - 64x14x14     - RF - 10x10
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)                       # input - 64x14x14  - output - 128x14x14    - RF - 12x12
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)                      # input - 128x14x14 - output - 256x14x14    - RF - 14x14                    
        self.pool2 = nn.MaxPool2d(2, 2)                                     # input - 256x14x14 - output - 256x7x7      - RF - 28x28
        self.conv5 = nn.Conv2d(256, 512, 3)                                 # input - 256x7x7   - output - 512x5x5      - RF - 30x30
        self.conv6 = nn.Conv2d(512, 1024, 3)                                # input - 512x5x5   - output - 1024x3x3     - RF - 32x32
        self.conv7 = nn.Conv2d(1024, 10, 3)                                 # input - 1024x3x3  - output - 10x1x1       - RF - 34x34

    
    # Performs forward propagation in neural netowrk , over-ride the base class forward function    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))   # performs conv1 -> relu -> conv2 -> relu -> pool1
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))   # performs conv3 -> relu -> conv4 -> relu -> pool2
        # Understanding -  RELU does
        # The Rectified Linear Unit is the most commonly used activation function in deep learning models. 
        # The function returns 0 if it receives any negative input, but for any positive value  x  it returns that value back. So it can be written as  f(x)=max(0,x)

        x = F.relu(self.conv6(F.relu(self.conv5(x))))               # performs conv5 -> relu -> conv6 -> relu
        # just by removing F.relu() we get 95+% accuracy, thats because never use ReLU in the last layer, the model cannot learn negative values thus the loss doesnt decrease further
        # in all likelihood ReLU doesn't make sense. ... If we allow negative values, which is most of the time, we generally don't have an activation function on the last layer.
#        x = F.relu(self.conv7(x))                                   
        x = self.conv7(x)                                           # Remove relu from the previous statement. This shall give an accuracy of 95%+
        x = x.view(-1, 10)                                          # view method is used as a way of reshaping the tensor,similar to reshape in numpy
        return F.log_softmax(x)                                     # Applies the Softmax function to an n-dimensional input Tensor rescaling them so that the elements of the n-dimensional output Tensor 
                                                                    # lie in the range [0,1] and sum to 1

In [3]:
# Install torchsummary, a package for printing model summary similar to keras model.summary
!pip install torchsummary
from torchsummary import summary

# Check if 'cuda' is present or not
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Create a neural network and assign it to device memory
model = Net().to(device)

# print model summary and pass model input size which is the image size, In this case, (1, 28, 28) --> (channel, height, width)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [4]:
# A seed it assigned for reproducibilty. For e.g same value is returned for every call to torch.rand(2)

torch.manual_seed(1)
batch_size = 128    # Batch size is the no of samples to be loaded within every forward/backward pass


kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}         # num_workers allows parallel processing of items within a batch. pin_memory=True is used for faster copies from a host 
train_loader = torch.utils.data.DataLoader(                                 # Loading the train data. DataLoader is used to load the data which can be iterated.
    datasets.MNIST('../data', train=True, download=True,                    # Here we are downloading MNSIT dataset and assigning to dataset
                    transform=transforms.Compose([                          # Here we are composing different trasnformations
                        transforms.ToTensor(),                              # First transformation of Converting to Tensor
                        transforms.Normalize((0.1307,), (0.3081,))          # Second transformation of Normalizing data with mean and standard devication
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)                          # Specifying batch size, Shuffle=True allows using different data every time
test_loader = torch.utils.data.DataLoader(                                  # Loading the test data. DataLoader is used to load the data which can be iterated.
    datasets.MNIST('../data', train=False, transform=transforms.Compose([   # Here we are composing different trasnformations
                        transforms.ToTensor(),                              # First transformation of Converting to Tensor
                        transforms.Normalize((0.1307,), (0.3081,))          # Second transformation of Normalizing data with mean and standard devication
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)                          # Specifying batch size, Shuffle=True allows using different data every time


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!




In [5]:
# accuracy=[]  # List for capturing the accuracy
from tqdm import tqdm                                                           # tqdm is a utility for displaying progress bar


"""trains the model

Args
    model: the model to be trained
    device: the device on which to be trained, cpu/gpu
    train_loader: the train data loader from torch.utils.data.DataLoader
    optimizer: the optimizer to use for training
    epoch: the number of epoch to run for

Returns
    None
"""

def train(model, device, train_loader, optimizer, epoch):                       # Method that does actual training of a model
    model.train()                                                               # Actual training of our neural network
    pbar = tqdm(train_loader)                                                   # Passing training data loader to the tqdm object for the progress bar
    for batch_idx, (data, target) in enumerate(pbar):                           # Enumerating the progress bar object that returns a tuple
        data, target = data.to(device), target.to(device)                       # Assigning data and target to device memory
        optimizer.zero_grad()                                                   # Zero_grad does clear(zero) all the gradients before back propagation in order to avoid adding to earlier gradients
        output = model(data)                                                    # We pass data to our model and get the output
        loss = F.nll_loss(output, target)                                       # Calculating Loss
                                                                                # nll_loss(output, target) , output is the model prediction(what the model predicted on giving an image/data) and target is the actual label of the given image
        loss.backward()                                                         # Backpropogation, It computes gradient of loss for all parameters in loss that have requires_grad=True
        optimizer.step()                                                        # Its a Gradient descent
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')  # Create a progress bar with loss and batch_id

"""tests the model

Args
    model: the model to test
    device: the device to use
    test_loader: the test data loader from torch.utils.data.DataLoader
"""

def test(model, device, test_loader):                                           # Method that does actual training of a model using different things
    model.eval()                                                                # Evaluate a model with a score on how well it performed
    test_loss = 0                                                               # Test loss
    correct = 0                                                                 # Accuracy
    with torch.no_grad():                                                       #Asking not to use gradients, to reduce memory usage as Gradients are not needed when evaluation
        for data, target in test_loader:                                        # Enumerate over test loader
            data, target = data.to(device), target.to(device)                   # Assigning data and target to device memory
            output = model(data)                                                # Pass the data to our model and get the output    
            test_loss += F.nll_loss(output, target, reduction='sum').item()     # sum up batch loss    
            pred = output.argmax(dim=1, keepdim=True)                           # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()               # Compute Accuracy

    test_loss /= len(test_loader.dataset)                                       # Compute Test loss

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(  # Print Average loss and accuracy         
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
#    accuracy.append(100. * correct / len(test_loader.dataset))

In [6]:
model = Net().to(device)                                          # Create a neural network and assign it to device memory
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # Using SGD optimizer with learning rate of 0.01 and momentum of 0.9

for epoch in range(1, 2):                                         # Enumerate over 1 epochs
    print("Epoch : ",epoch,"\n")
    train(model, device, train_loader, optimizer, epoch)          # Perform model training
    test(model, device, test_loader)                              # Perform model testing

  0%|          | 0/469 [00:00<?, ?it/s]

Epoch :  1 



loss=0.0322597436606884 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.13it/s]



Test set: Average loss: 0.0646, Accuracy: 9799/10000 (98%)



In [None]:
# print(accuracy)
# import matplotlib.pyplot as plt
# plt.plot(accuracy)