In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

This step will try to increase train accuracy

**Target**

1. The second maxpool is just going from size 4 to size 2 which is input to GAP. This may be loss of information and overkill for maxpool. Drop second max pool
2. RF after removal of maxpool is only 16 at output. Add conv layers to increase it to 24. Compensate params by decreasing max filters to 16.
3. Add augmentation so it learns slight differences
4. By observation of output, test is as good or ahead of train accuracy. Hence reduced dropout to 0.03



**Results**


1. Params 8086
2. Best train accuracy 98.95
3. Best test accuracy 99.05








In [36]:
dropout_value = 0.01
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        ## INPUT BLOCK
        self.convblock1 = nn.Sequential(
            nn.Conv2d(1, 8, 3), 
            nn.BatchNorm2d(num_features=8, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 28 o 26 RF 3

        ## BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(8, 12, 3), 
            nn.BatchNorm2d(num_features=12, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 26 o 24 RF 5
        self.convblock3 = nn.Sequential(
            nn.Conv2d(12, 16, 3), 
            nn.BatchNorm2d(num_features=16, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 24 o 22 RF 7

        ## TRANSITION BLOCK
        self.pool1 = nn.MaxPool2d(2, 2) #i 22 o 11 RF 8
        self.convblock4 = nn.Sequential(
            nn.Conv2d(16, 12, 1), 
            nn.BatchNorm2d(num_features=12, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 11 o 11 RF 8

        
        ## BLOCK 2
        self.convblock5 = nn.Sequential(
            nn.Conv2d(12, 12, 3),
            nn.BatchNorm2d(num_features=12, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 11 o 9 RF 12
        self.convblock6 = nn.Sequential(
            nn.Conv2d(12, 12, 3),
            nn.BatchNorm2d(num_features=12, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 9 o 7 RF 16
        self.convblock7 = nn.Sequential(
            nn.Conv2d(12, 12, 3),
            nn.BatchNorm2d(num_features=12, eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Dropout(dropout_value)
        ) #i 7 o 5 RF 20


        ## OUTPUT BLOCK
        self.convblock8 = nn.Sequential(
            nn.Conv2d(12, 10, 3), #i 5 o 3 RF 24
            nn.AdaptiveAvgPool2d(1)
        )  
        

    def forward(self, x):
        ## block 1
        x = self.convblock1(x) 
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x) 
        x = self.convblock4(x) 
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.convblock7(x) 

        x = self.convblock8(x) 
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [37]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
           Dropout-4            [-1, 8, 26, 26]               0
            Conv2d-5           [-1, 12, 24, 24]             876
       BatchNorm2d-6           [-1, 12, 24, 24]              24
              ReLU-7           [-1, 12, 24, 24]               0
           Dropout-8           [-1, 12, 24, 24]               0
            Conv2d-9           [-1, 16, 22, 22]           1,744
      BatchNorm2d-10           [-1, 16, 22, 22]              32
             ReLU-11           [-1, 16, 22, 22]               0
          Dropout-12           [-1, 16, 22, 22]               0
        MaxPool2d-13           [-1, 16, 11, 11]               0
           Conv2d-14           [-1, 12,



In [38]:


torch.manual_seed(1)
batch_size = 256

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [39]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

        ## accumulate correct over each batch
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
    print(f'Epoch {epoch}, train accuracy {100*correct / len(train_loader.dataset)}')

def test(model, device, test_loader, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'Epoch {epoch}, test accuracy {100. * correct / len(test_loader.dataset)}')
    # print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    #     test_loss, correct, len(test_loader.dataset),
    #     100. * correct / len(test_loader.dataset)))

In [40]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#optimizer = optim.Adam(model.parameters(), lr=0.005)

for epoch in range(1, 20):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader, epoch)

loss=0.11003419756889343 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.61it/s]

Epoch 1, train accuracy 83.67833333333333





Epoch 1, test accuracy 96.96


loss=0.04593658074736595 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.65it/s]

Epoch 2, train accuracy 97.49666666666667





Epoch 2, test accuracy 98.53


loss=0.035413648933172226 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.66it/s]


Epoch 3, train accuracy 98.11333333333333
Epoch 3, test accuracy 98.78


loss=0.12981769442558289 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.59it/s]

Epoch 4, train accuracy 98.45833333333333





Epoch 4, test accuracy 98.89


loss=0.012764482758939266 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.53it/s]

Epoch 5, train accuracy 98.57666666666667





Epoch 5, test accuracy 98.92


loss=0.05179479345679283 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.57it/s]

Epoch 6, train accuracy 98.715





Epoch 6, test accuracy 99.04


loss=0.05871449038386345 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.53it/s]

Epoch 7, train accuracy 98.81





Epoch 7, test accuracy 98.89


loss=0.039895471185445786 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.52it/s]

Epoch 8, train accuracy 98.85833333333333





Epoch 8, test accuracy 99.05


loss=0.049563679844141006 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.51it/s]

Epoch 9, train accuracy 98.865





Epoch 9, test accuracy 99.02


loss=0.029586082324385643 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.49it/s]

Epoch 10, train accuracy 98.97





Epoch 10, test accuracy 99.21


loss=0.08862755447626114 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.56it/s]

Epoch 11, train accuracy 99.01166666666667





Epoch 11, test accuracy 99.24


loss=0.001968724885955453 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.65it/s]

Epoch 12, train accuracy 99.11833333333334





Epoch 12, test accuracy 99.25


loss=0.005062307696789503 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.65it/s]

Epoch 13, train accuracy 99.07833333333333





Epoch 13, test accuracy 99.14


loss=0.004363211803138256 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.59it/s]

Epoch 14, train accuracy 99.09166666666667





Epoch 14, test accuracy 99.17


loss=0.02164757065474987 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.60it/s]

Epoch 15, train accuracy 99.13





Epoch 15, test accuracy 99.21


loss=0.027007466182112694 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.55it/s]

Epoch 16, train accuracy 99.19666666666667





Epoch 16, test accuracy 99.11


loss=0.00894882995635271 batch_id=234: 100%|██████████| 235/235 [00:26<00:00,  8.76it/s]

Epoch 17, train accuracy 99.25666666666666





Epoch 17, test accuracy 99.22


loss=0.0019000977044925094 batch_id=234: 100%|██████████| 235/235 [00:26<00:00,  8.71it/s]

Epoch 18, train accuracy 99.26666666666667





Epoch 18, test accuracy 99.23


loss=0.0609353668987751 batch_id=234: 100%|██████████| 235/235 [00:27<00:00,  8.65it/s]

Epoch 19, train accuracy 99.24666666666667





Epoch 19, test accuracy 99.28
