<a href="https://colab.research.google.com/github/gremlin97/EVA-8/blob/main/S5/Eva3_Step2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Target:**

Added LR Schdeuler with gamma=0.1 and step=6, reduced channel size by reducing the number of kernels for each convolution block. Pushed parms below 10k. Increased learning rate to increasing learning for epochs below 15 and to offset the regularization. Removed Padding=1 to reduce feature map size faster. Added random rotation to image of 7 degrees. Added a Fully Connected Layer after GAP Layer to increase model capacity.

**Results**:

* Parameters: 9,866
* Best Train Accuracy: 98.68
* Best Test Accuracy: 99.23

**Analysis:**
I was able to reduce the model parameters below 10k by reducing the number of filter and maintaining the number of out channels as 16 after each channel. The train accuracy was lower by the test accuracy by around 1% indicating that my model can learn more and achieve higher accuracy. The learning has become harder to to the multiple form of regularizations (dropout, random rotations). Somehow I need to increase the learning.

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=0) # RF:1+(3-1)1=3; ji=1,jo=1; 28x28x1 -> 26x26x16
        self.bn1 = nn.BatchNorm2d(16)
        self.pool1 = nn.MaxPool2d(2, 2) # RF:3+(2-1)1=4; ji=1,jo=2; 26x26x16 -> 13x13x16
        self.drop1 = nn.Dropout(0.1)
        self.conv3 = nn.Conv2d(16, 16, 3, padding=0) # RF:4+(3-1)2=8; ji=2,jo=2; 13x13x16 -> 11x11x16
        self.bn2 = nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(2, 2) # RF:8+(2-1)2=10; ji=2,jo=4; 11x11x16 -> 5x5x16
        self.drop2 = nn.Dropout(0.1)
        self.conv5 = nn.Conv2d(16, 16, 3, padding=1) # RF:10+(3-1)4=18; ji=4,jo=4; 5x5x16 -> 5x5x16
        self.bn3 = nn.BatchNorm2d(16) 
        self.pool3 = nn.MaxPool2d(2, 2) # RF:18+(2-1)4=22; ji=4,jo=8; 5x5x16 -> 2x2x16
        self.drop3 = nn.Dropout(0.1)
        self.conv6 = nn.Conv2d(16, 32, 3, padding=1) # RF:22+(3-1)8=38; ji=4,jo=8; 2x2x16 -> 2x2x32

        self.gap = nn.AdaptiveAvgPool2d((1,1)) 

        self.lin = nn.Linear(32, 10)

    def forward(self, x):

        x = self.conv6(self.drop3(self.pool3(self.bn3(F.relu(self.conv5(self.drop2(self.pool2(self.bn2(F.relu(self.conv3(F.relu(self.drop1(self.pool1(self.bn1(F.relu(self.conv1(x)))))))))))))))))

        x = self.gap(x)

        x = x.view(-1, 32)

        x = self.lin(x)
        
        # x = x.view(-1, 10)
        return F.log_softmax(x)

In [None]:
model = Net()
out = model(torch.randn(1,1,28,28))
print(out.shape)

torch.Size([1, 10])


  return F.log_softmax(x)


In [None]:
model

Net(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout(p=0.1, inplace=False)
  (conv3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop2): Dropout(p=0.1, inplace=False)
  (conv5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop3): Dropout(p=0.1, inplace=False)
  (conv6): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (gap): AdaptiveAvgPool2d(output_size=(1, 1))
  (lin): Linear(in_featur

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             160
       BatchNorm2d-2           [-1, 16, 26, 26]              32
         MaxPool2d-3           [-1, 16, 13, 13]               0
           Dropout-4           [-1, 16, 13, 13]               0
            Conv2d-5           [-1, 16, 11, 11]           2,320
       BatchNorm2d-6           [-1, 16, 11, 11]              32
         MaxPool2d-7             [-1, 16, 5, 5]               0
           Dropout-8             [-1, 16, 5, 5]               0
            Conv2d-9             [-1, 16, 5, 5]           2,320
      BatchNorm2d-10             [-1, 16, 5, 5]              32
        MaxPool2d-11             [-1, 16, 2, 2]               0
          Dropout-12             [-1, 16, 2, 2]               0
    

  return F.log_softmax(x)


In [None]:
torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-5.0, 5.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [None]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Train Accuracy={100. * correct / len(train_loader.dataset)}')

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Test Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.012, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1)

#lr=0.01

for epoch in range(15):
    print("Epoch: ",epoch+1)
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

Epoch:  1


  return F.log_softmax(x)
loss=0.015074221417307854 batch_id=1874 Train Accuracy=93.71666666666667: 100%|██████████| 1875/1875 [00:29<00:00, 62.60it/s]



Test set: Average loss: 0.0626, Test Accuracy: 9802/10000 (98%)

Epoch:  2


loss=0.14403703808784485 batch_id=1874 Train Accuracy=97.31166666666667: 100%|██████████| 1875/1875 [00:29<00:00, 63.09it/s]



Test set: Average loss: 0.0551, Test Accuracy: 9819/10000 (98%)

Epoch:  3


loss=0.13558490574359894 batch_id=1874 Train Accuracy=97.745: 100%|██████████| 1875/1875 [00:30<00:00, 61.92it/s]



Test set: Average loss: 0.0458, Test Accuracy: 9848/10000 (98%)

Epoch:  4


loss=0.08981756865978241 batch_id=1874 Train Accuracy=97.97833333333334: 100%|██████████| 1875/1875 [00:30<00:00, 62.47it/s]



Test set: Average loss: 0.0446, Test Accuracy: 9856/10000 (99%)

Epoch:  5


loss=0.006964302621781826 batch_id=1874 Train Accuracy=98.14833333333333: 100%|██████████| 1875/1875 [00:29<00:00, 63.56it/s]



Test set: Average loss: 0.0414, Test Accuracy: 9859/10000 (99%)

Epoch:  6


loss=0.08933671563863754 batch_id=1874 Train Accuracy=98.20666666666666: 100%|██████████| 1875/1875 [00:29<00:00, 62.58it/s]



Test set: Average loss: 0.0413, Test Accuracy: 9857/10000 (99%)

Epoch:  7


loss=0.16809818148612976 batch_id=1874 Train Accuracy=98.57333333333334: 100%|██████████| 1875/1875 [00:30<00:00, 61.93it/s]



Test set: Average loss: 0.0333, Test Accuracy: 9877/10000 (99%)

Epoch:  8


loss=0.05306488648056984 batch_id=1874 Train Accuracy=98.63166666666666: 100%|██████████| 1875/1875 [00:30<00:00, 62.49it/s]



Test set: Average loss: 0.0354, Test Accuracy: 9884/10000 (99%)

Epoch:  9


loss=0.013209614902734756 batch_id=1874 Train Accuracy=98.62166666666667: 100%|██████████| 1875/1875 [00:29<00:00, 62.91it/s]



Test set: Average loss: 0.0324, Test Accuracy: 9898/10000 (99%)

Epoch:  10


loss=0.01640196144580841 batch_id=1874 Train Accuracy=98.65833333333333: 100%|██████████| 1875/1875 [00:30<00:00, 61.64it/s]



Test set: Average loss: 0.0319, Test Accuracy: 9891/10000 (99%)

Epoch:  11


loss=0.004300000611692667 batch_id=1874 Train Accuracy=98.65: 100%|██████████| 1875/1875 [00:29<00:00, 63.17it/s]



Test set: Average loss: 0.0315, Test Accuracy: 9903/10000 (99%)

Epoch:  12


loss=0.006753540597856045 batch_id=1874 Train Accuracy=98.67166666666667: 100%|██████████| 1875/1875 [00:29<00:00, 63.41it/s]



Test set: Average loss: 0.0306, Test Accuracy: 9897/10000 (99%)

Epoch:  13


loss=0.2030770182609558 batch_id=1874 Train Accuracy=98.70166666666667: 100%|██████████| 1875/1875 [00:29<00:00, 63.01it/s]



Test set: Average loss: 0.0307, Test Accuracy: 9901/10000 (99%)

Epoch:  14


loss=0.11410541832447052 batch_id=1874 Train Accuracy=98.79: 100%|██████████| 1875/1875 [00:30<00:00, 62.06it/s]



Test set: Average loss: 0.0289, Test Accuracy: 9911/10000 (99%)

Epoch:  15


loss=0.1270199716091156 batch_id=1874 Train Accuracy=98.77166666666666: 100%|██████████| 1875/1875 [00:29<00:00, 62.87it/s]



Test set: Average loss: 0.0310, Test Accuracy: 9889/10000 (99%)



In [None]:
Training_Logs = '''



'''