In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [81]:
class BaseBlock(nn.Module):
    def __init__(self, cin, cout, skip_conn):
        super(BaseBlock, self).__init__()
        # boolean on whether to use skip connections.
        self.skip_conn = skip_conn

        # convolution layers.
        self.conv1 = self.conv_block(c_in=cin, c_out=cout, kernel_size=3, stride=1, padding=1)
        self.conv2 = self.conv_block(c_in=cout, c_out=cout, kernel_size=3, stride=1, padding=1)

        # dropout and relu layers.
        self.drop = nn.Dropout2d(0.1)
        self.relu = nn.ReLU()

    def forward(self, x):
        # store initial input for later use
        og = x

        # 2 convolution operations
        x = self.conv1(x)
        x = self.relu(x)
        x = self.drop(x)
        x = self.conv2(x)

        # if boolean is true, add skip conection.
        if self.skip_conn:
            x += og
        
        x = self.relu(x)
        
        return x


    def conv_block(self, c_in, c_out, **kwargs):
        seq_block = nn.Sequential(
            nn.Conv2d(in_channels=c_in, out_channels=c_out, **kwargs),
            nn.BatchNorm2d(num_features=c_out)
        )
        
        return seq_block

In [88]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = BaseBlock(cin=1, cout=8, skip_conn=False)
        self.conv2 = BaseBlock(cin=8, cout=8, skip_conn=True)

        self.conv3 = BaseBlock(cin=8, cout=16, skip_conn=False)
        self.conv4 = BaseBlock(cin=16, cout=16, skip_conn=True)

        self.conv5 = BaseBlock(cin=16, cout=16, skip_conn=True)
        self.conv6 = BaseBlock(cin=16, cout=16, skip_conn=True)

        self.fc = nn.Linear(16, 10)
        
        self.gap = nn.AvgPool2d(7)
        self.pool = nn.MaxPool2d(2, 2)
        self.drop = nn.Dropout2d(0.1)

    def forward(self, x):
        x = self.conv1(x) # out: 8 x 28 x 28, rf: 5
        x = self.conv2(x) # out: 16 x 28 x 28, rf: 9
        
        x = self.pool(x) # out: 8 x 14 x 14, rf: 18
        
        x = self.conv3(x) # out: 16 x 28 x 28, rf: 22
        x = self.conv4(x) # out: 16 x 28 x 28, rf: 26

        x = self.pool(x) # out: 16 x 14 x 14, rf: 52
        
        x = self.conv5(x) # out: 16 x 28 x 28, rf: 56
        x = self.conv6(x) # out: 16 x 28 x 28, rf: 60

        x = self.gap(x) # out: 16 x 1 x 1
        x = x.squeeze() # out: 16
        x = self.fc(x) # out: 10
        
        return F.log_softmax(x)


    def conv_block(self, c_in, c_out, **kwargs):
        seq_block = nn.Sequential(
            nn.Conv2d(in_channels=c_in, out_channels=c_out, **kwargs),
            nn.BatchNorm2d(num_features=c_out),
            nn.ReLU()
        )
        
        return seq_block

In [90]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
              ReLU-3            [-1, 8, 28, 28]               0
         Dropout2d-4            [-1, 8, 28, 28]               0
            Conv2d-5            [-1, 8, 28, 28]             584
       BatchNorm2d-6            [-1, 8, 28, 28]              16
              ReLU-7            [-1, 8, 28, 28]               0
         BaseBlock-8            [-1, 8, 28, 28]               0
            Conv2d-9            [-1, 8, 28, 28]             584
      BatchNorm2d-10            [-1, 8, 28, 28]              16
             ReLU-11            [-1, 8, 28, 28]               0
        Dropout2d-12            [-1, 8, 28, 28]               0
           Conv2d-13            [-1, 8, 28, 28]             584
      BatchNorm2d-14            [-1, 8,



In [91]:
torch.manual_seed(1)
batch_size = 64

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation(10),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,)),
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

In [92]:
from tqdm import tqdm

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = train_loader
    train_correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        train_correct += pred.eq(target.view_as(pred)).sum().item()
        # pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

    print(f"Train Set => Average Loss = {loss.item()/len(train_loader.dataset)} | Average Acc = {train_correct/len(train_loader.dataset)}")


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set => Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [93]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.99)

for epoch in range(1, 20):
    print("=" * 50)
    print("EPOCH = ", epoch)
    print("=" * 50)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH =  1




Train Set => Average Loss = 3.4933462738990786e-06 | Average Acc = 0.8203

Test set => Average loss: 0.0866, Accuracy: 9731/10000 (97%)

EPOCH =  2
Train Set => Average Loss = 4.377861817677816e-06 | Average Acc = 0.9590333333333333

Test set => Average loss: 0.0443, Accuracy: 9857/10000 (99%)

EPOCH =  3
Train Set => Average Loss = 1.0052536924680074e-06 | Average Acc = 0.9691333333333333

Test set => Average loss: 0.0393, Accuracy: 9885/10000 (99%)

EPOCH =  4
Train Set => Average Loss = 1.6802065074443817e-06 | Average Acc = 0.9735333333333334

Test set => Average loss: 0.0371, Accuracy: 9891/10000 (99%)

EPOCH =  5
Train Set => Average Loss = 3.0200543502966563e-07 | Average Acc = 0.97705

Test set => Average loss: 0.0266, Accuracy: 9912/10000 (99%)

EPOCH =  6
Train Set => Average Loss = 1.593261957168579e-06 | Average Acc = 0.9792333333333333

Test set => Average loss: 0.0273, Accuracy: 9914/10000 (99%)

EPOCH =  7
Train Set => Average Loss = 4.988945399721464e-07 | Average Acc =

The test accuracy is = 9944/10000 (99.44%).