In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

This step puts basic skeleton in place. 

**Target**
parameters less than 20k

1.   Basic skeleton in place. Skip higher number of filters, digits do not have that many textures and patterns (circles, corners, angle lines) so max 24 filters. 
2.   Parameters less than 20k. Use 1x1 convs to keep #filters in check and trim
filters that are not useful

**Results**
Shows potential

1. Params 9058
2. best train accuracy 99.48
3. best test accuracy 99.01
4. Shows overfitting, test accuracy is <=99. 
5. Not converging fast enough. 








In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        ## INPUT BLOCK
        self.convblock1 = nn.Sequential(
            nn.Conv2d(1, 8, 3), 
            nn.BatchNorm2d(num_features=8, eps=1e-05, momentum=0.1),
            nn.ReLU()
        ) #i 28 o 26 RF 3

        ## BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(8, 16, 3), 
            nn.BatchNorm2d(num_features=16, eps=1e-05, momentum=0.1),
            nn.ReLU()
        ) #i 26 o 24 RF 5
        self.convblock3 = nn.Sequential(
            nn.Conv2d(16, 24, 3), 
            nn.BatchNorm2d(num_features=24, eps=1e-05, momentum=0.1),
            nn.ReLU()
        ) #i 24 o 22

        ## TRANSITION BLOCK
        self.pool1 = nn.MaxPool2d(2, 2) #i 22 o 11
        self.convblock4 = nn.Sequential(
            nn.Conv2d(24, 16, 1), 
            nn.BatchNorm2d(num_features=16, eps=1e-05, momentum=0.1),
            nn.ReLU()
        ) #i 11 o 11

        
        #self.dropout = nn.Dropout(0.1)
        
        ## BLOCK 2
        self.convblock5 = nn.Sequential(
            nn.Conv2d(16, 16, 3),
            nn.BatchNorm2d(num_features=16, eps=1e-05, momentum=0.1),
            nn.ReLU()
        ) #i 11 o 9

        self.pool2 = nn.MaxPool2d(2, 2) #i 9 o 4

        ## OUTPUT BLOCK
        self.convblock6 = nn.Sequential(
            nn.Conv2d(16, 10, 3), #i4 o 2
            nn.AdaptiveAvgPool2d(1)
        )  
        

    def forward(self, x):
        ## block 1
        x = self.convblock1(x) 
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x) 
        x = self.convblock4(x) 
        x = self.convblock5(x)
        x = self.pool2(x)
        x = self.convblock6(x) 
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
            Conv2d-4           [-1, 16, 24, 24]           1,168
       BatchNorm2d-5           [-1, 16, 24, 24]              32
              ReLU-6           [-1, 16, 24, 24]               0
            Conv2d-7           [-1, 24, 22, 22]           3,480
       BatchNorm2d-8           [-1, 24, 22, 22]              48
              ReLU-9           [-1, 24, 22, 22]               0
        MaxPool2d-10           [-1, 24, 11, 11]               0
           Conv2d-11           [-1, 16, 11, 11]             400
      BatchNorm2d-12           [-1, 16, 11, 11]              32
             ReLU-13           [-1, 16, 11, 11]               0
           Conv2d-14             [-1, 1



In [None]:


torch.manual_seed(1)
batch_size = 256

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [None]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

        ## accumulate correct over each batch
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
    print(f'Epoch {epoch}, train accuracy {100*correct / len(train_loader.dataset)}')

def test(model, device, test_loader, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'Epoch {epoch}, test accuracy {100. * correct / len(test_loader.dataset)}')
    # print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    #     test_loss, correct, len(test_loader.dataset),
    #     100. * correct / len(test_loader.dataset)))

In [None]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#optimizer = optim.Adam(model.parameters(), lr=0.0005)

for epoch in range(1, 16):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader, epoch)

loss=0.1534167379140854 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.91it/s]

Epoch 1, train accuracy 89.45





Epoch 1, test accuracy 96.09


loss=0.1428501456975937 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.89it/s]

Epoch 2, train accuracy 97.61166666666666





Epoch 2, test accuracy 98.12


loss=0.051603201776742935 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.99it/s]

Epoch 3, train accuracy 98.18166666666667





Epoch 3, test accuracy 98.34


loss=0.03382066264748573 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.93it/s]

Epoch 4, train accuracy 98.48333333333333





Epoch 4, test accuracy 98.45


loss=0.025210464373230934 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.85it/s]

Epoch 5, train accuracy 98.68333333333334





Epoch 5, test accuracy 98.27


loss=0.044026169925928116 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.89it/s]

Epoch 6, train accuracy 98.805





Epoch 6, test accuracy 98.51


loss=0.04324883595108986 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.90it/s]

Epoch 7, train accuracy 98.93833333333333





Epoch 7, test accuracy 98.85


loss=0.0786801278591156 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.91it/s]

Epoch 8, train accuracy 99.04





Epoch 8, test accuracy 98.83


loss=0.01645025424659252 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.89it/s]

Epoch 9, train accuracy 99.10166666666667





Epoch 9, test accuracy 98.8


loss=0.022150347009301186 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.79it/s]

Epoch 10, train accuracy 99.15333333333334





Epoch 10, test accuracy 98.96


loss=0.017302006483078003 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.78it/s]

Epoch 11, train accuracy 99.24166666666666





Epoch 11, test accuracy 98.86


loss=0.01737177185714245 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.80it/s]

Epoch 12, train accuracy 99.3





Epoch 12, test accuracy 98.85


loss=0.004603361710906029 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.86it/s]

Epoch 13, train accuracy 99.36





Epoch 13, test accuracy 99.01


loss=0.002201046561822295 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.94it/s]

Epoch 14, train accuracy 99.41666666666667





Epoch 14, test accuracy 98.91


loss=0.025155993178486824 batch_id=234: 100%|██████████| 235/235 [00:19<00:00, 11.93it/s]

Epoch 15, train accuracy 99.48





Epoch 15, test accuracy 98.96
