In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys

project_module_path = os.path.abspath(os.path.join('../'))
if project_module_path not in sys.path:
    print(f'Adding {project_module_path} to system paths')
    sys.path.append(project_module_path)

# %env AWS_PROFILE=development

Adding /Users/bensoorajmohan/Development/ai-playground/pytorch-mnist-play/src to system paths


# Base Model v1

## Target
- Basic setup: Loading data, train and test loops
- Basic model with regularization to prevent overfitting and improve generalization,
    - Batch-normalisation to reduce the risk of memorizing noise in the training data
    - Dropout to perform better on new and unseen data.
- Global Average Pooling instead of a fully connected layer at the end
## Result
- Parameters: 24,986
- Accuracy:
| Accuracy | Best | Final |
| --- | --- | --- |
| Train | 99.77 | 99.77
| Test | 99.81 | 99.40
## Analysis
- Model is moderately heavy
- Overfitting (train acc > test acc)


## Import all the libraries

In [2]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from tqdm import tqdm
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter

!pip install torchsummary
from torchsummary import summary



In [3]:
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

logging.info("logging configured")  # calling the root logger

INFO:root:logging configured


## Configure device

In [4]:
use_cuda = torch.cuda.is_available()
use_mps = torch.backends.mps.is_available()

device: torch.device = None
if use_cuda:
    device = torch.device("cuda")
elif use_mps:
    device = torch.device("mps")
else:
    device = torch.device("cpu")

assert device is not None, "device must not be None"
assert isinstance(device, torch.device), "device must be an instance of torch.device"

## Download the dataset and configure loaders

In [5]:
torch.manual_seed(1)
BATCH_SIZE = 128

# Transforms
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train = datasets.MNIST('../../data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('../../data', train=False, download=True, transform=test_transforms)

dataloader_args = dict(shuffle=True, batch_size=BATCH_SIZE, num_workers=2, pin_memory=True) if (use_cuda or use_mps) else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)


## Model summary

In [6]:
from models import ModelV1
summary(ModelV1(), input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
              ReLU-2           [-1, 16, 28, 28]               0
            Conv2d-3           [-1, 16, 28, 28]           2,320
              ReLU-4           [-1, 16, 28, 28]               0
       BatchNorm2d-5           [-1, 16, 28, 28]              32
         MaxPool2d-6           [-1, 16, 14, 14]               0
            Conv2d-7           [-1, 32, 14, 14]           4,640
              ReLU-8           [-1, 32, 14, 14]               0
       BatchNorm2d-9           [-1, 32, 14, 14]              64
          Dropout-10           [-1, 32, 14, 14]               0
           Conv2d-11           [-1, 32, 14, 14]           9,248
             ReLU-12           [-1, 32, 14, 14]               0
      BatchNorm2d-13           [-1, 32, 14, 14]              64
        MaxPool2d-14             [-1, 3

## Train and evaluate the model

### Configuration

In [7]:
from pytorch_mnist_play import Config, Trainer

summaryWriter=SummaryWriter("../../logs/mnist")
tConfig = Config(
    summaryWriter=summaryWriter,
    epochs=15
)
print(f"tConfig: {tConfig}")

modelV1 = ModelV1().to(device)

tConfig: batch_size=128 epochs=15 lr=0.01 momentum=0.9 seed=1 log_dir='logs/mnist' summaryWriter=<torch.utils.tensorboard.writer.SummaryWriter object at 0x1045e49d0>


### Start the loop

In [8]:
trainer = Trainer(tConfig)
trainer.fit(modelV1, device, train_loader, test_loader, modelV1.name())

Epoch 1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 469/469 [00:10<00:00, 43.12it/s, batch_id=468, loss=0.0614]
INFO:pytorch_mnist_play.trainer:[TRAIN ModelV1] Epoch 01 - Accuracy: 57166/60000 (95.28%)
INFO:pytorch_mnist_play.trainer:[TEST ModelV1] Epoch 01 - Loss: 0.0441, Accuracy: 9874/10000 (98.74%)
Epoch 2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 469/469 [00:06<00:00, 73.73it/s, batch_id=468, loss=0.0507]
INFO:pytorch_mnist_play.trainer:[TRAIN ModelV1] Epoch 02 - Accuracy: 59168/60000 (98.61%)
INFO:pytorch_mnist_play.trainer:[TEST ModelV1] Epoch 02 - Loss: 0.0380, Accuracy: 9881/10000 (98.81%)
Epoch 3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████