# Attempt 1:
## Target: 
Reduce the parameters to less than 10K and try achieving 90+ accuracy

## Results:
Achieved 8998 parameters and 99.11% accuracy

## Analysis
Given the problem of classifying gray scale images into a total of 10 classes requires much lesser number of layers, hence tried to reduce the layers. Also updated batch size from previously 128 to now 64 value, as we saw from the batch size - accuracy graph, max accuracy was achieved with around 64 batch size. Also added rotation tranformation for data, which as we saw in class based on the dataset has helped in past.

Plan to try RandomCrop or CenterCrop transforms as well in next attempt, apart from other ways.

# Import Libraries

In [None]:
# importing all the Python Packages & torch Library.
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

# Define Data Transformations

In [None]:
train_transforms = transforms.Compose([
                        transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])

test_transforms = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])

# Dataloader Arguments & Train / Test Dataloaders

In [None]:
# seed the model to obtain consistent results
torch.manual_seed(1)
# this is the batch size , in 1 pas no of images passed together.
batch_size = 64
use_cuda = torch.cuda.is_available()

kwargs = {'num_workers': 2, 'pin_memory': True} if use_cuda else {}

# load the training data and perform standard normalization 
# parameter for normalization is mean and std dev.
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=train_transforms), batch_size=batch_size, shuffle=True, **kwargs)
#load the test data
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=test_transforms),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# The Model

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Sequential(
            
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=0, bias = True), # output 26X26X16 : RF- 3X3
            nn.ReLU(),
            nn.BatchNorm2d(16), # Batch Normalization after each convolution.
            nn.Dropout2d(0.1), # dropout of 10% at each layer
            
            nn.Conv2d(16, 32, 3), # output 24X24X32 : RF- 5x5
            nn.ReLU(),
            nn.BatchNorm2d(32),  # Batch Normalization after each convolution.
            nn.Dropout2d(0.1),  # dropout of 10% at each layer

            nn.MaxPool2d(2, 2)       # output 12X12X32 : RF - 6x6 
            )
        self.conv2 = nn.Sequential(
            
           
           # using 1X1 filter to reduce the no of channel.
            nn.Conv2d(32, 8, 1), # output 12X12X8 : RF - 6x6
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout2d(0.1),

            nn.Conv2d(8, 16, 3), # output 10X10X16 : RF - 10x10
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1),

            nn.MaxPool2d(2, 2) # output 5x5x16 RF - 12x12
            )
        self.conv3 = nn.Sequential(
           
            nn.Conv2d(16, 16, 3,padding=1), # output 5X5X16 : RF - 16x16
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout2d(0.1),

            nn.Conv2d(16, 10, 1), # output 5X5X10 : RF - 16x6
            nn.AvgPool2d(5) # output 1x1x10 : RF - 24x24

            )
        
    def forward(self, x):
        
        x = self.conv1(x)
        x= self.conv2(x)
        x= self.conv3(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, -1)





# Print Summary of Model

In [None]:
!pip install torchsummary
from torchsummary import summary
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             160
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
         Dropout2d-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 32, 24, 24]           4,640
              ReLU-6           [-1, 32, 24, 24]               0
       BatchNorm2d-7           [-1, 32, 24, 24]              64
         Dropout2d-8           [-1, 32, 24, 24]               0
         MaxPool2d-9           [-1, 32, 12, 12]               0
           Conv2d-10            [-1, 8, 12, 12]             264
             ReLU-11            [-1, 8, 12, 12]               0
      BatchNorm2d-12            [-1, 8, 12, 12]              16
        Dropout2d-13            [-1, 8, 12, 12]               0
           Conv2d-14           [-1, 16,

# Define Train and Test functions

In [None]:
from tqdm import tqdm
# Function to train 
'''
Args: 
Model : created model to be used for training
device : GPU or cpu
train_laoded: data on which the training has to be done
Optimizer : the optimization algorithm to be used
epoch : no fo epoch 

'''
def train(model, device, train_loader, optimizer, epoch):
    model.train() # Set the model on training mode
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device) # moving the data to device
        optimizer.zero_grad() # zero the graidents 
        output = model(data) # getting the model output
        loss = F.nll_loss(output, target) # calculating the The negative log likelihood loss
        loss.backward() # flowing the gradients backward.
        optimizer.step() # paameter updated basd on the current gradient.
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

# Function to test 
'''
Args: 
Model : created model to be used for training
device : GPU or cpu
test_laoded: data on which the testing has to be done
 

'''
def test(model, device, test_loader):
    model.eval() # seting up the model for evalaution.
    test_loss = 0 # setting the test loss to 0
    correct = 0 # countign the no of coorect classfication.
    with torch.no_grad(): # turn off gradients, since we are in test mode
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)   # copy the data to device.
            output = model(data) # predict the output
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset) # calculating hte test loss.

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# Run the model

In [None]:
 
model = Net().to(device) # move the model to device.
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # intiating the SGD optimizer

for epoch in range(1, 16):
    print("epoch =", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    

  0%|          | 0/938 [00:00<?, ?it/s]

epoch = 1


loss=0.3651917576789856 batch_id=937: 100%|██████████| 938/938 [00:29<00:00, 31.93it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.1132, Accuracy: 9696/10000 (96.96%)

epoch = 2


loss=0.2775864005088806 batch_id=937: 100%|██████████| 938/938 [00:29<00:00, 32.00it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0705, Accuracy: 9800/10000 (98.00%)

epoch = 3


loss=0.11844843626022339 batch_id=937: 100%|██████████| 938/938 [00:29<00:00, 32.18it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0586, Accuracy: 9834/10000 (98.34%)

epoch = 4


loss=0.15125104784965515 batch_id=937: 100%|██████████| 938/938 [00:29<00:00, 31.85it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0471, Accuracy: 9860/10000 (98.60%)

epoch = 5


loss=0.05502571538090706 batch_id=937: 100%|██████████| 938/938 [00:29<00:00, 32.21it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0409, Accuracy: 9873/10000 (98.73%)

epoch = 6


loss=0.20654486119747162 batch_id=937: 100%|██████████| 938/938 [00:29<00:00, 32.14it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0365, Accuracy: 9882/10000 (98.82%)

epoch = 7


loss=0.11412115395069122 batch_id=937: 100%|██████████| 938/938 [00:33<00:00, 27.84it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0360, Accuracy: 9895/10000 (98.95%)

epoch = 8


loss=0.08260751515626907 batch_id=937: 100%|██████████| 938/938 [00:30<00:00, 30.71it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0323, Accuracy: 9905/10000 (99.05%)

epoch = 9


loss=0.03487825393676758 batch_id=937: 100%|██████████| 938/938 [00:30<00:00, 30.70it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0320, Accuracy: 9896/10000 (98.96%)

epoch = 10


loss=0.046137336641550064 batch_id=937: 100%|██████████| 938/938 [00:30<00:00, 30.71it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0282, Accuracy: 9915/10000 (99.15%)

epoch = 11


loss=0.3173339068889618 batch_id=937: 100%|██████████| 938/938 [00:30<00:00, 30.43it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0257, Accuracy: 9907/10000 (99.07%)

epoch = 12


loss=0.2559162676334381 batch_id=937: 100%|██████████| 938/938 [00:30<00:00, 30.63it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0251, Accuracy: 9919/10000 (99.19%)

epoch = 13


loss=0.12365398555994034 batch_id=937: 100%|██████████| 938/938 [00:32<00:00, 28.90it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0267, Accuracy: 9913/10000 (99.13%)

epoch = 14


loss=0.12465160340070724 batch_id=937: 100%|██████████| 938/938 [00:31<00:00, 30.07it/s]
  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0268, Accuracy: 9908/10000 (99.08%)

epoch = 15


loss=0.00965686421841383 batch_id=937: 100%|██████████| 938/938 [00:30<00:00, 30.51it/s]



Test set: Average loss: 0.0253, Accuracy: 9922/10000 (99.22%)

