# Import Libraries

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise.


In [None]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
                                       ])


# Dataset and Creating Train/Test Split

In [None]:
import torchvision
train = torchvision.datasets.CIFAR10('./data', train=True, download=True, transform=train_transforms)
test = torchvision.datasets.CIFAR10('./data', train=False, download=True, transform=test_transforms)



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 92683388.31it/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# Dataloader Arguments & Test/Train Dataloaders


In [None]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True




# Data Statistics

It is important to know your data very well. Let's check some of the statistics around our data and how it actually looks like

In [None]:
# We'd need to convert it into Numpy! Remember above we have converted it into tensors already
train_data = train.train_data
train_data = train.transform(train_data.numpy())

print('[Train]')
print(' - Numpy Shape:', train.train_data.cpu().numpy().shape)
print(' - Tensor Shape:', train.train_data.size())
print(' - min:', torch.min(train_data))
print(' - max:', torch.max(train_data))
print(' - mean:', torch.mean(train_data))
print(' - std:', torch.std(train_data))
print(' - var:', torch.var(train_data))

dataiter = iter(train_loader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

# Let's visualize some of the images
%matplotlib inline
import matplotlib.pyplot as plt

plt.imshow(images[0].numpy().squeeze(), cmap='gray_r')


AttributeError: ignored

## MORE

It is important that we view as many images as possible. This is required to get some idea on image augmentation later on

In [None]:
figure = plt.figure()
num_of_images = 60
for index in range(1, num_of_images + 1):
    plt.subplot(6, 10, index)
    plt.axis('off')
    plt.imshow(images[index].numpy().squeeze(), cmap='gray_r')

# The model
Let's start with the model we first saw

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block C1
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        ) # output_size = 32

        # CONVOLUTION BLOCK 1 C2
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        ) # output_size = 32
        # TRANSITION BLOCK 1 c3
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=24, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(24),
            nn.ReLU()
        ) # output_size = 32

        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 16 P1
        #C4
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=24, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(24),
            nn.ReLU()
        ) # output_size = 16
        #C5
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=24, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(24),
            nn.ReLU()
        ) # output_size = 16
        #C6
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=24, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(24),
            nn.ReLU()
        ) # output_size = 16
        #c7
        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        ) # output_size = 16
        #P2
        self.pool2 = nn.MaxPool2d(2, 2) # output_size = 8
        #C8
        # CONVOLUTION BLOCK 2
        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        ) # output_size = 6
        #C9
        self.convblock9 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        ) # output_size = 4
      #C10
        # OUTPUT BLOCK C1 C2 c3 P1 C4 C5 C6 c7 P2 C8 C9 C10 GAP C11
        self.convblock10 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
        ) # output_size = 2
        #GAP
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=8)
        ) # output_size = 1

        self.convblock11 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        ) # output_size = 32

        self.dropout = nn.Dropout(0.15)
    def forward(self, x):
        #C1 C2 c3 P1 C3 C4 C5 c6 P2 C7 C8 C9 GAP C10
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.dropout(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.convblock7(x)
        x = self.dropout(x)
        x = self.pool2(x)
        x = self.convblock8(x)
        x = self.convblock9(x)
        x = self.convblock10(x)
        x = self.gap(x)
        x = self.convblock11(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)


# Model Params
Can't emphasize on how important viewing Model Summary is.
Unfortunately, there is no in-built model visualizer, so we have to take external help

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(3, 32, 32))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             864
       BatchNorm2d-2           [-1, 32, 32, 32]              64
              ReLU-3           [-1, 32, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          18,432
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
            Conv2d-7           [-1, 24, 32, 32]           1,536
       BatchNorm2d-8           [-1, 24, 32, 32]              48
              ReLU-9           [-1, 24, 32, 32]               0
          Dropout-10           [-1, 24, 32, 32]               0
        MaxPool2d-11           [-1, 24, 16, 16]               0
           Conv2d-12           [-1, 24, 16, 16]           5,184

# Training and Testing

Looking at logs can be boring, so we'll introduce **tqdm** progressbar to get cooler logs.

Let's write train and test functions

In [None]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

# Let's Train and test our model

In [None]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


Loss=1.3065600395202637 Batch_id=390 Accuracy=37.28: 100%|██████████| 391/391 [00:20<00:00, 18.73it/s]



Test set: Average loss: 1.4695, Accuracy: 4270/10000 (42.70%)

EPOCH: 1


Loss=1.2001335620880127 Batch_id=390 Accuracy=57.29: 100%|██████████| 391/391 [00:19<00:00, 20.53it/s]



Test set: Average loss: 1.1070, Accuracy: 6001/10000 (60.01%)

EPOCH: 2


Loss=0.8119429349899292 Batch_id=390 Accuracy=64.43: 100%|██████████| 391/391 [00:19<00:00, 20.33it/s]



Test set: Average loss: 1.2041, Accuracy: 5767/10000 (57.67%)

EPOCH: 3


Loss=0.8721604347229004 Batch_id=390 Accuracy=68.31: 100%|██████████| 391/391 [00:18<00:00, 21.70it/s]



Test set: Average loss: 1.0126, Accuracy: 6349/10000 (63.49%)

EPOCH: 4


Loss=0.6732581257820129 Batch_id=390 Accuracy=70.97: 100%|██████████| 391/391 [00:18<00:00, 21.26it/s]



Test set: Average loss: 0.8903, Accuracy: 6798/10000 (67.98%)

EPOCH: 5


Loss=0.8857657313346863 Batch_id=390 Accuracy=72.89: 100%|██████████| 391/391 [00:18<00:00, 21.26it/s]



Test set: Average loss: 1.1121, Accuracy: 6010/10000 (60.10%)

EPOCH: 6


Loss=0.7710773944854736 Batch_id=390 Accuracy=74.18: 100%|██████████| 391/391 [00:18<00:00, 21.01it/s]



Test set: Average loss: 1.0080, Accuracy: 6501/10000 (65.01%)

EPOCH: 7


Loss=0.6363919973373413 Batch_id=390 Accuracy=75.62: 100%|██████████| 391/391 [00:19<00:00, 20.36it/s]



Test set: Average loss: 0.9265, Accuracy: 6792/10000 (67.92%)

EPOCH: 8


Loss=0.5927739143371582 Batch_id=390 Accuracy=76.70: 100%|██████████| 391/391 [00:17<00:00, 21.79it/s]



Test set: Average loss: 0.8156, Accuracy: 7046/10000 (70.46%)

EPOCH: 9


Loss=0.7299472093582153 Batch_id=390 Accuracy=78.06: 100%|██████████| 391/391 [00:17<00:00, 21.87it/s]



Test set: Average loss: 0.8386, Accuracy: 7047/10000 (70.47%)

EPOCH: 10


Loss=0.5722709894180298 Batch_id=390 Accuracy=78.63: 100%|██████████| 391/391 [00:18<00:00, 21.11it/s]



Test set: Average loss: 0.7909, Accuracy: 7199/10000 (71.99%)

EPOCH: 11


Loss=0.8313525319099426 Batch_id=390 Accuracy=79.49: 100%|██████████| 391/391 [00:17<00:00, 22.40it/s]



Test set: Average loss: 0.8156, Accuracy: 7138/10000 (71.38%)

EPOCH: 12


Loss=0.6613046526908875 Batch_id=390 Accuracy=80.19: 100%|██████████| 391/391 [00:19<00:00, 20.58it/s]



Test set: Average loss: 0.9032, Accuracy: 7002/10000 (70.02%)

EPOCH: 13


Loss=0.5608867406845093 Batch_id=390 Accuracy=80.87: 100%|██████████| 391/391 [00:18<00:00, 21.53it/s]



Test set: Average loss: 0.8512, Accuracy: 7012/10000 (70.12%)

EPOCH: 14


Loss=0.43281450867652893 Batch_id=390 Accuracy=81.47: 100%|██████████| 391/391 [00:17<00:00, 21.92it/s]



Test set: Average loss: 0.8284, Accuracy: 7156/10000 (71.56%)

EPOCH: 15


Loss=0.7298819422721863 Batch_id=390 Accuracy=81.66: 100%|██████████| 391/391 [00:17<00:00, 21.73it/s]



Test set: Average loss: 0.7687, Accuracy: 7321/10000 (73.21%)

EPOCH: 16


Loss=0.5559092164039612 Batch_id=390 Accuracy=82.08: 100%|██████████| 391/391 [00:17<00:00, 22.16it/s]



Test set: Average loss: 0.7818, Accuracy: 7322/10000 (73.22%)

EPOCH: 17


Loss=0.5277857780456543 Batch_id=390 Accuracy=82.47: 100%|██████████| 391/391 [00:18<00:00, 20.81it/s]



Test set: Average loss: 0.7731, Accuracy: 7352/10000 (73.52%)

EPOCH: 18


Loss=0.5372351408004761 Batch_id=390 Accuracy=82.81: 100%|██████████| 391/391 [00:20<00:00, 19.54it/s]



Test set: Average loss: 0.8508, Accuracy: 7104/10000 (71.04%)

EPOCH: 19


Loss=0.48913437128067017 Batch_id=390 Accuracy=83.08: 100%|██████████| 391/391 [00:23<00:00, 16.85it/s]



Test set: Average loss: 0.7024, Accuracy: 7550/10000 (75.50%)



In [None]:
fig, axs = plt.subplots(2,2,figsize=(15,10))
axs[0, 0].plot(train_losses)
axs[0, 0].set_title("Training Loss")
axs[1, 0].plot(train_acc[4000:])
axs[1, 0].set_title("Training Accuracy")
axs[0, 1].plot(test_losses)
axs[0, 1].set_title("Test Loss")
axs[1, 1].plot(test_acc)
axs[1, 1].set_title("Test Accuracy")