# Import Libraries

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

  from .autonotebook import tqdm as notebook_tqdm


## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise.


In [2]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                    #    transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.RandomRotation((-15.0, 15.0), fill=(1,1,1)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                       ])


# Dataset and Creating Train/Test Split

In [3]:
# from utils import get_mnist_data
# train, test = get_mnist_data(train_transforms, test_transforms)
train = datasets.CIFAR10('./data', train=True, download=True, transform=train_transforms)
test = datasets.CIFAR10('./data', train=False, download=True, transform=test_transforms)

Files already downloaded and verified
Files already downloaded and verified


# Dataloader Arguments & Test/Train Dataloaders


In [4]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


# The model
Let's start with the model we first saw

In [5]:
import torch.nn.functional as F
dropout_value = 0.05
# from model import Model_4
class Model_4(nn.Module):
    def __init__(self, normal="bn"):
        super(Model_4, self).__init__()
        

        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value)
        ) # output_size = 26

        
        # CONVOLUTION BLOCK 2

        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=8, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value)
        ) # output_size = 24

        # CONVOLUTION BLOCK 3

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=8, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(dropout_value)
        ) # output_size = 24
        
        # TRANSITION BLOCK 1
        self.tranblock1 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
        ) # output_size = 24
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 12


        # CONVOLUTION BLOCK 4

        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 10
        
        
        # CONVOLUTION BLOCK 5

        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 8
        
        # CONVOLUTION BLOCK 6

        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 8
        self.tranblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(1, 1), padding=0, bias=False),
        ) # output_size = 6
        self.pool2 = nn.MaxPool2d(2, 2)
        
        # CONVOLUTION BLOCK 7

        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 10
        
        # CONVOLUTION BLOCK 8

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 8
        
        
        # CONVOLUTION BLOCK 9

        self.convblock9 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),            
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 8
        self.tranblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=(1, 1), padding=0, bias=False),
        )

        # GAP BLOCK
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=8)
        ) # output_size = 1

        # CONVOLUTION BLOCK 10
        self.convblock10 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            # nn.BatchNorm2d(10),
            # nn.ReLU(),
            # nn.Dropout(dropout_value)
        ) 


        self.dropout = nn.Dropout(dropout_value)

    def forward(self, x):
        x1 = self.convblock1(x)
        x = self.convblock2(x1)
        x = self.convblock3(x) + x1
        x = self.tranblock1(x)
        x = self.pool1(x)
        x1 = self.convblock4(x)
        x = self.convblock5(x1)
        x = self.convblock6(x) + x1
        x = self.tranblock2(x)
        x = self.pool2(x)
        x1 = self.convblock7(x)
        x = self.convblock8(x1)
        x = self.convblock9(x) + x1
        x = self.tranblock3(x)
        x = self.gap(x)       
        x = self.convblock10(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)



# Model Params
Can't emphasize on how important viewing Model Summary is.
Unfortunately, there is no in-built model visualizer, so we have to take external help

In [6]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Model_4().to(device)
summary(model, input_size=(3, 32, 32))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 32, 32]             216
              ReLU-2            [-1, 8, 32, 32]               0
       BatchNorm2d-3            [-1, 8, 32, 32]              16
           Dropout-4            [-1, 8, 32, 32]               0
            Conv2d-5            [-1, 8, 32, 32]             576
              ReLU-6            [-1, 8, 32, 32]               0
       BatchNorm2d-7            [-1, 8, 32, 32]              16
           Dropout-8            [-1, 8, 32, 32]               0
            Conv2d-9            [-1, 8, 32, 32]             576
             ReLU-10            [-1, 8, 32, 32]               0
      BatchNorm2d-11            [-1, 8, 32, 32]              16
          Dropout-12            [-1, 8, 32, 32]               0
           Conv2d-13           [-1, 16, 32, 32]             128
        MaxPool2d-14           [-1

# Training and Testing

All right, so we have 24M params, and that's too many, we know that. But the purpose of this notebook is to set things right for our future experiments.

Looking at logs can be boring, so we'll introduce **tqdm** progressbar to get cooler logs.

Let's write train and test functions

In [7]:
# from utils import train, test



from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [8]:
from torch.optim.lr_scheduler import StepLR

model =  Model_4().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.08, momentum=0.9)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)


EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

EPOCH: 0


Loss=1.3798052072525024 Batch_id=390 Accuracy=40.59: 100%|███████████████████████████| 391/391 [00:15<00:00, 25.92it/s]



Test set: Average loss: 1.3765, Accuracy: 4907/10000 (49.07%)

EPOCH: 1


Loss=1.308971643447876 Batch_id=390 Accuracy=52.82: 100%|████████████████████████████| 391/391 [00:14<00:00, 26.24it/s]



Test set: Average loss: 1.4083, Accuracy: 5134/10000 (51.34%)

EPOCH: 2


Loss=1.3459490537643433 Batch_id=390 Accuracy=58.26: 100%|███████████████████████████| 391/391 [00:14<00:00, 27.79it/s]



Test set: Average loss: 1.1167, Accuracy: 5958/10000 (59.58%)

EPOCH: 3


Loss=1.0483582019805908 Batch_id=390 Accuracy=61.93: 100%|███████████████████████████| 391/391 [00:14<00:00, 26.81it/s]



Test set: Average loss: 1.0918, Accuracy: 6107/10000 (61.07%)

EPOCH: 4


Loss=0.961347222328186 Batch_id=390 Accuracy=64.60: 100%|████████████████████████████| 391/391 [00:14<00:00, 27.68it/s]



Test set: Average loss: 0.9299, Accuracy: 6659/10000 (66.59%)

EPOCH: 5


Loss=0.7553361058235168 Batch_id=390 Accuracy=69.33: 100%|███████████████████████████| 391/391 [00:14<00:00, 27.40it/s]



Test set: Average loss: 0.8487, Accuracy: 6939/10000 (69.39%)

EPOCH: 6


Loss=1.0297825336456299 Batch_id=390 Accuracy=70.15: 100%|███████████████████████████| 391/391 [00:14<00:00, 27.28it/s]



Test set: Average loss: 0.8257, Accuracy: 7068/10000 (70.68%)

EPOCH: 7


Loss=0.9469733238220215 Batch_id=390 Accuracy=70.80: 100%|███████████████████████████| 391/391 [00:14<00:00, 26.84it/s]



Test set: Average loss: 0.8041, Accuracy: 7159/10000 (71.59%)

EPOCH: 8


Loss=0.7960101366043091 Batch_id=390 Accuracy=71.41: 100%|███████████████████████████| 391/391 [00:14<00:00, 27.50it/s]



Test set: Average loss: 0.7973, Accuracy: 7187/10000 (71.87%)

EPOCH: 9


Loss=0.835424542427063 Batch_id=390 Accuracy=71.54: 100%|████████████████████████████| 391/391 [00:15<00:00, 25.99it/s]



Test set: Average loss: 0.7905, Accuracy: 7196/10000 (71.96%)

EPOCH: 10


Loss=0.9278632998466492 Batch_id=390 Accuracy=72.25: 100%|███████████████████████████| 391/391 [00:14<00:00, 26.20it/s]



Test set: Average loss: 0.7836, Accuracy: 7243/10000 (72.43%)

EPOCH: 11


Loss=0.6024354696273804 Batch_id=390 Accuracy=72.37: 100%|███████████████████████████| 391/391 [00:14<00:00, 27.22it/s]



Test set: Average loss: 0.7834, Accuracy: 7217/10000 (72.17%)

EPOCH: 12


Loss=0.8467873334884644 Batch_id=390 Accuracy=72.20: 100%|███████████████████████████| 391/391 [00:14<00:00, 26.20it/s]



Test set: Average loss: 0.7793, Accuracy: 7249/10000 (72.49%)

EPOCH: 13


Loss=0.7034211158752441 Batch_id=390 Accuracy=72.44: 100%|███████████████████████████| 391/391 [00:14<00:00, 26.47it/s]



Test set: Average loss: 0.7746, Accuracy: 7261/10000 (72.61%)

EPOCH: 14


Loss=0.6986692547798157 Batch_id=390 Accuracy=72.68: 100%|███████████████████████████| 391/391 [00:15<00:00, 25.73it/s]



Test set: Average loss: 0.7804, Accuracy: 7248/10000 (72.48%)

EPOCH: 15


Loss=0.8723853826522827 Batch_id=390 Accuracy=72.57: 100%|███████████████████████████| 391/391 [00:15<00:00, 24.80it/s]



Test set: Average loss: 0.7782, Accuracy: 7263/10000 (72.63%)

EPOCH: 16


Loss=0.7118234038352966 Batch_id=390 Accuracy=72.44: 100%|███████████████████████████| 391/391 [00:15<00:00, 25.94it/s]



Test set: Average loss: 0.7767, Accuracy: 7249/10000 (72.49%)

EPOCH: 17


Loss=0.6645475625991821 Batch_id=390 Accuracy=72.30: 100%|███████████████████████████| 391/391 [00:15<00:00, 25.84it/s]



Test set: Average loss: 0.7749, Accuracy: 7260/10000 (72.60%)

EPOCH: 18


Loss=0.5948140621185303 Batch_id=390 Accuracy=72.55: 100%|███████████████████████████| 391/391 [00:14<00:00, 26.69it/s]



Test set: Average loss: 0.7800, Accuracy: 7238/10000 (72.38%)

EPOCH: 19


Loss=0.8665997385978699 Batch_id=390 Accuracy=72.78: 100%|███████████████████████████| 391/391 [00:15<00:00, 24.65it/s]



Test set: Average loss: 0.7793, Accuracy: 7248/10000 (72.48%)

