In [1]:
import torch.nn as nn
import torch


class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, batch_norm = True):
        self.batch_norm = batch_norm
        super(ConvBlock, self).__init__()
        # Conv2d layers used throughout the model.
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        if self.batch_norm:
            self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.ReLU()
        
    def forward(self, x):
        x = self.conv(x)
        if self.batch_norm:
            x = self.bn(x)
        x = self.act(x)
        return x


class InceptionF5(nn.Module):
    # Figure 5 defined in Table 1 of the paper: https://arxiv.org/pdf/1512.00567.pdf
    def __init__(self, in_channels, batch_norm):
        super(InceptionF5, self).__init__()
        # Base >> 1X1 Conv. >> 3X3 Conv. >> 3X3 Conv. >> Final Concat
        self.inceptionf5_1 = nn.Sequential(
            ConvBlock(in_channels, 64, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(64, 96, kernel_size=3, stride=1, padding=1, batch_norm = batch_norm),
            ConvBlock(96, 96, kernel_size=3, stride=1, padding=1, batch_norm = batch_norm))
        
        # Base >> 1X1 Conv. >> 3X3 Conv. >> Final Concat
        self.inceptionf5_2 = nn.Sequential(
            ConvBlock(in_channels, 48, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(48, 64, kernel_size=3, stride=1, padding=1, batch_norm = batch_norm))
        
        # Base >> Pool. >> 1X1 Conv. >> Final Concat
        self.inceptionf5_3 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            ConvBlock(in_channels, 64, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm))
            
        # Base >> 1X1 Conv. >> Final Concat
        self.inceptionf5_4 = nn.Sequential(
            ConvBlock(in_channels, 64, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm))
        
    def forward(self, x):
        inceptionf5_1 = self.inceptionf5_1(x)
        inceptionf5_2 = self.inceptionf5_2(x)
        inceptionf5_3 = self.inceptionf5_3(x)
        inceptionf5_4 = self.inceptionf5_4(x)
        return torch.cat([inceptionf5_1, inceptionf5_2, inceptionf5_3, inceptionf5_4], 1)


class InceptionF6(nn.Module):
    # Figure 6 defined in Table 1 of the paper: https://arxiv.org/pdf/1512.00567.pdf
    def __init__(self, in_channels, f_7x7, batch_norm):
        super(InceptionF6, self).__init__()
        # Base >> 1X1 Conv. >> 1Xn Conv. >> nX1 Conv. >> 1Xn Conv. >> nX1 Conv. >> Final Concat
        self.inceptionf6_1 = nn.Sequential(
            ConvBlock(in_channels, f_7x7, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(f_7x7, f_7x7, kernel_size=(1,7), stride=1, padding=(0,3), batch_norm = batch_norm),
            ConvBlock(f_7x7, f_7x7, kernel_size=(7,1), stride=1, padding=(3,0), batch_norm = batch_norm),
            ConvBlock(f_7x7, f_7x7, kernel_size=(1,7), stride=1, padding=(0,3), batch_norm = batch_norm),
            ConvBlock(f_7x7, 192, kernel_size=(7,1), stride=1, padding=(3,0), batch_norm = batch_norm))
        
        # Base >> 1X1 Conv. >> 1X7 Conv. >> 7X1 Conv. >> Final Concat
        self.inceptionf6_2 = nn.Sequential(
            ConvBlock(in_channels, f_7x7, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(f_7x7, f_7x7, kernel_size=(1,7), stride=1, padding=(0,3), batch_norm = batch_norm),
            ConvBlock(f_7x7, 192, kernel_size=(7,1), stride=1, padding=(3,0), batch_norm = batch_norm))
         
        # Base >> Pool. >> 1X1 Conv. >> Final Concat       
        self.inceptionf6_3 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            ConvBlock(in_channels, 192, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm))
        
        # Base >> 1X1 Conv. >> Final Concat
        self.inceptionf6_4 = nn.Sequential(
            ConvBlock(in_channels, 192, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm))
        
    def forward(self, x):
        inceptionf6_1 = self.inceptionf6_1(x)
        inceptionf6_2 = self.inceptionf6_2(x)
        inceptionf6_3 = self.inceptionf6_3(x)
        inceptionf6_4 = self.inceptionf6_4(x)
        return torch.cat([inceptionf6_1, inceptionf6_2, inceptionf6_3, inceptionf6_4], 1)


class InceptionF7(nn.Module):
    # Figure 7 defined in Table 1 of the paper: https://arxiv.org/pdf/1512.00567.pdf
    def __init__(self, in_channels, batch_norm):
        super(InceptionF7, self).__init__()
        # Base >> 1X1 Conv. >> 3X3 Conv. >> 1X3 Conv. Left & 3X1 Conv. Right >> Final Concat
        self.inceptionf7_1 = nn.Sequential(
            ConvBlock(in_channels, 448, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(448, 384, kernel_size=(3,3), stride=1, padding=1, batch_norm = batch_norm))
        self.inceptionf7_1_left = ConvBlock(384, 384, kernel_size=(1,3), stride=1, padding=(0,1), batch_norm = batch_norm)
        self.inceptionf7_1_right = ConvBlock(384, 384, kernel_size=(3,1), stride=1, padding=(1,0), batch_norm = batch_norm)
        
        # Base >> 1X1 Conv. >> 1X3 Conv. Left & 3X1 Conv. Right >> Final Concat
        self.inceptionf7_2 = ConvBlock(in_channels, 384, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm)
        self.inceptionf7_2_left = ConvBlock(384, 384, kernel_size=(1,3), stride=1, padding=(0,1), batch_norm = batch_norm)
        self.inceptionf7_2_right = ConvBlock(384, 384, kernel_size=(3,1), stride=1, padding=(1,0), batch_norm = batch_norm)
        
        # Base >> Pool. >> 1X1 Conv. >> Final Concat       
        self.inceptionf7_3 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            ConvBlock(in_channels, 192, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm))
        
        # Base >> 1X1 Conv. >> Final Concat
        self.inceptionf7_4 = nn.Sequential(
            ConvBlock(in_channels, 320, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm))
        
    def forward(self, x):
        inceptionf7_1 = self.inceptionf7_1(x)
        inceptionf7_1 = torch.cat([self.inceptionf7_1_left(inceptionf7_1), self.inceptionf7_1_right(inceptionf7_1)], 1)
        inceptionf7_2 = self.inceptionf7_2(x)
        inceptionf7_2 = torch.cat([self.inceptionf7_2_left(inceptionf7_2), self.inceptionf7_2_right(inceptionf7_2)], 1)
        inceptionf7_3 = self.inceptionf7_3(x)
        inceptionf7_4 = self.inceptionf7_4(x)
        
        return torch.cat([inceptionf7_1, inceptionf7_2, inceptionf7_3, inceptionf7_4], 1)


class InceptionRed(nn.Module):
    # Figure 10 of the paper: https://arxiv.org/pdf/1512.00567.pdf
    # Reduction blocks to reduce the grid sizes between the Inception blocks and imporve pooling operations. 
    def __init__(self, in_channels, f_3x3_r, add_ch=0, batch_norm = True):
        super(InceptionRed, self).__init__()
        # Base >> 1X1 Conv. >> 3X3 Conv. >> 3X3 Conv. >> Final Concat
        self.inceptionred_1 = nn.Sequential(
            ConvBlock(in_channels, f_3x3_r, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(f_3x3_r, 178 + add_ch, kernel_size=3, stride=1, padding=1, batch_norm = batch_norm),
            ConvBlock(178 + add_ch, 178 + add_ch, kernel_size=3, stride=2, padding=0, batch_norm = batch_norm))
        
        # Base >> 1X1 Conv. >> 3X3 Conv. >> Final Concat
        self.inceptionred_2 = nn.Sequential(
            ConvBlock(in_channels, f_3x3_r, kernel_size=1, stride=1, padding=0, batch_norm = batch_norm),
            ConvBlock(f_3x3_r, 302 + add_ch, kernel_size=3, stride=2, padding=0, batch_norm = batch_norm))
        
        # Base >> Pool. >> Final Concat
        self.inceptionred_3 = nn.Sequential(
            nn.MaxPool2d(3, stride=2, padding=0))
        
    def forward(self, x):
        inceptionred_1 = self.inceptionred_1(x)
        inceptionred_2 = self.inceptionred_2(x)
        inceptionred_3 = self.inceptionred_3(x)
        return torch.cat([inceptionred_1, inceptionred_2, inceptionred_3], 1)


class InceptionAux(nn.Module):
    # Auxiliary Classifier from the paper: https://arxiv.org/pdf/1512.00567.pdf
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.pool = nn.AdaptiveAvgPool2d((4,4))
        self.conv = nn.Conv2d(in_channels, 128, kernel_size=1, stride=1, padding=0)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(2048, 1024)
        self.dropout = nn.Dropout(0.7)
        self.fc2 = nn.Linear(1024, num_classes)
    
    def forward(self, x):
        x = self.pool(x)
        x = self.conv(x)
        x = self.relu(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x



class InceptionV2(nn.Module):
    def __init__(self, num_classes = 100, batch_norm = True, dropout = True, init_weights=True, aux_logits=True):
        super(InceptionV2, self).__init__()
        self.dropout = dropout
        self.aux_logits = aux_logits
        # Intial Conv. layers and MaxPool before Inception layers. 
        self.conv1 = ConvBlock(3, 32, kernel_size=3, stride=2, padding=0, batch_norm = batch_norm)
        self.conv2 = ConvBlock(32, 32, kernel_size=3, stride=1, padding=0, batch_norm = batch_norm)
        self.conv3 = ConvBlock(32, 64, kernel_size=3, stride=1, padding=1, batch_norm = batch_norm)
        self.pool1 = nn.MaxPool2d(3, stride=2, padding=0)
        self.conv4 = ConvBlock(64, 80, kernel_size=3, stride=1, padding=0, batch_norm = batch_norm)
        self.conv5 = ConvBlock(80, 192, kernel_size=3, stride=2, padding=0, batch_norm = batch_norm)
        self.conv6 = ConvBlock(192, 288, kernel_size=3, stride=1, padding=1, batch_norm = batch_norm)
        # Inception layers (F5)
        self.inception3a = InceptionF5(288, batch_norm = batch_norm)
        self.inception3b = InceptionF5(288, batch_norm = batch_norm)
        self.inception3c = InceptionF5(288, batch_norm = batch_norm)
        # Reduction layer
        self.inceptionRed1 = InceptionRed(288,f_3x3_r=64, add_ch=0, batch_norm = batch_norm)
        # Inception layers (F6)
        self.inception4a = InceptionF6(768, f_7x7=128, batch_norm = batch_norm)
        self.inception4b = InceptionF6(768, f_7x7=160, batch_norm = batch_norm)
        self.inception4c = InceptionF6(768, f_7x7=160, batch_norm = batch_norm)
        self.inception4d = InceptionF6(768, f_7x7=160, batch_norm = batch_norm)
        self.inception4e = InceptionF6(768, f_7x7=192, batch_norm = batch_norm)
        # Reduction layer
        self.inceptionRed2 = InceptionRed(768,f_3x3_r=192, add_ch=16, batch_norm = batch_norm)
        # if self.aux_logits:
        self.aux = InceptionAux(768, num_classes) 
        # Inception layers (F7)
        self.inception5a = InceptionF7(1280, batch_norm = batch_norm)
        self.inception5b = InceptionF7(2048, batch_norm = batch_norm)
        # AdaptivePooling and Fully Connected 
        self.pool6 = nn.AdaptiveAvgPool2d((1,1))
        if dropout:
            self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(2048, num_classes)

        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    import scipy.stats as stats
                    stddev = m.stddev if hasattr(m, 'stddev') else 0.1
                    X = stats.truncnorm(-2, 2, scale=stddev)
                    values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
                    values = values.view(m.weight.size())
                    with torch.no_grad():
                        m.weight.copy_(values)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        # Intial Conv. layers and MaxPool before Inception layers. 
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.pool1(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        # Inception layers (F5)
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.inception3c(x)
        # Reduction layer
        x = self.inceptionRed1(x)
        # Inception layers (F6)
        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)
        # if self.aux_logits:
        aux = self.aux(x)
        # else:
        #     aux = None
        x = self.inceptionRed2(x) 
        # Inception layers (F7)
        x = self.inception5a(x)
        x = self.inception5b(x)
        # AdaptivePooling and Fully Connected 
        x = self.pool6(x)
        if self.dropout:
            x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        
        return x, aux



In [3]:
model = model = InceptionV2(batch_norm = True, dropout = True, init_weights=False) 

model

InceptionV2(
  (conv1): ConvBlock(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU()
  )
  (conv2): ConvBlock(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU()
  )
  (conv3): ConvBlock(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU()
  )
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): ConvBlock(
    (conv): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1))
    (bn): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU()
  )
  (conv5): ConvBlock(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stride=(2, 2))
    (bn): BatchNo

In [None]:
%reload_ext tensorboard

LOG_DIR = "/content/drive/My Drive/Deep Learning Assignment/logs"

import os   
import datetime
import tensorflow as tf  
tensorboard_callback = tf.keras.callbacks.TensorBoard(LOG_DIR, histogram_freq=1)
%tensorboard --logdir='/content/drive/My Drive/Deep Learning Assignment/logs'

In [None]:
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
import time
from datetime import datetime
import os
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score
import torch.nn as nn
from torchvision import models


def train(epoch):
    start = time.time() 
    y_pred = []
    y_true = []   

    # Run model in training mode
    model.train()

    for batch_index, (images, labels) in enumerate(training_loader):
        iter_num = (epoch - 1) * len(training_loader) + batch_index + 1
        # Convert the inputs to GPU compatible tensors. 
        if cuda_available:
            images = images.cuda()
            labels = labels.cuda()

        # Learning on the training data
        optimizer.zero_grad()
        # outputs = model(images)
        output0, output1 = model(images)
            
        # Compute the loss.
        loss0 = loss_function(output0, labels)
        loss1 = loss_function(output1, labels)
        loss = loss0 + 0.3 * loss1
        # loss = loss_function(outputs, labels)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()

        output_values, predicted = output0.max(1)
        y_pred.extend(predicted.cpu().tolist())
        y_true.extend(labels.cpu().tolist())

        # Printing the training results and storing them to tensorboard
        print('Training Epoch: {epoch} [{num_trained}/{num_samples}]\tLoss: {:0.2f}'.format(
            loss.item(),
            epoch = epoch,
            num_trained = batch_index * batch_size + len(images),
            num_samples = len(training_loader.dataset)))
        writer.add_scalar('Train: Loss', loss.item(), iter_num)
        
    # Calculating Metrics
    accuracy = accuracy_score(y_true, y_pred)

    # Time consumed for a epoch
    time_consumed = time.time() - start
    print('Time taken to train epoch {epoch}: {:.2f}s'.format(time_consumed, epoch = epoch))
    writer.add_scalar('Train Set: Accuracy', accuracy, epoch)



@torch.no_grad()
def test(epoch):
    test_loss = 0.0
    y_pred = []
    y_true = []
    len_test_loader = len(test_loader.dataset)
    start = time.time()
    
    # Run model in evaluation mode
    model.eval()

    for (images, labels) in test_loader:
        # Convert the inputs to GPU compatible tensors. 
        if cuda_available:
            images = images.cuda()
            labels = labels.cuda()

        # Predicting labels of test image set
        # model_outputs = model(images)
        output0, output1 = model(images)
            
        # Compute the loss.
        loss0 = loss_function(output0, labels)
        loss1 = loss_function(output1, labels)
        model_loss = loss0 + 0.3 * loss1
        # model_loss = loss_function(model_outputs, labels)

        test_loss += model_loss.item()
        output_values, predicted = output0.max(1)
        y_pred.extend(predicted.cpu().tolist())
        y_true.extend(labels.cpu().tolist())
        
    # Calculating Metrics
    accuracy = accuracy_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=1)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=1)
    loss = test_loss / len_test_loader
    time_consumed = time.time() - start

    # GPU stats
    # if cuda_available:
    #     print(torch.cuda.memory_summary())

    # Printing the testing results and storing them to tensorboard
    print('Testing Network for epoch: ', epoch)
    print('Evaluation: Evaluation Time: {:.2f}s, Average loss: {:.4f}, Accuracy: {:.4f}, Recall: {:.4f}, Precision: {:.4f}'.format(time_consumed, loss, accuracy, recall, precision))
    writer.add_scalar('Test Set: Average loss', loss, epoch)
    writer.add_scalar('Test Set: Accuracy', accuracy, epoch)
    return accuracy, test_loss


global cuda_available
global writer
global batch_size
DATA_ROOT = './data'
batch_size = 256
epochs = 200
min_early_stopping = 150
patience = 20
milestones = [50, 100, 150]
LOG_DIR = '/content/drive/My Drive/Deep Learning Assignment/logs'
checkpoints_path = '/content/drive/My Drive/Deep Learning Assignment/checkpoints'
batch_norm = True
dropout = False


setting = ''
if dropout:
    setting = 'Dropout'
elif batch_norm:
    setting = 'BatchNormalization'
else:
    setting = 'NoRegularization'

# Method to compute mean and std. 
# def compute_mean_std(cifar100_dataset):
#     data_r = numpy.dstack([cifar100_dataset[i][1][:, :, 0] for i in range(len(cifar100_dataset))])
#     data_g = numpy.dstack([cifar100_dataset[i][1][:, :, 1] for i in range(len(cifar100_dataset))])
#     data_b = numpy.dstack([cifar100_dataset[i][1][:, :, 2] for i in range(len(cifar100_dataset))])
#     mean = numpy.mean(data_r), numpy.mean(data_g), numpy.mean(data_b)
#     std = numpy.std(data_r), numpy.std(data_g), numpy.std(data_b)
#     return mean, std

# mean and std are computed on the training set. 
mean = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
std = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)


transform_train = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.RandomCrop(96, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


transform_test = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Load training data from CIFAR100
train_data = torchvision.datasets.CIFAR100(root=DATA_ROOT, train=True, download=True, transform=transform_train)
training_loader = DataLoader(train_data, shuffle=True, num_workers=4, batch_size=batch_size)

# Load testing data from CIFAR100
test_data = torchvision.datasets.CIFAR100(root=DATA_ROOT, train=False, download=True, transform=transform_test)
test_loader = DataLoader(test_data, shuffle=True, num_workers=4, batch_size=batch_size)

# Model
model = model = InceptionV2(batch_norm = batch_norm, dropout = dropout, init_weights=True) 
print(model)

# Check if any GPU is available
cuda_available = torch.cuda.is_available()
if torch.cuda.is_available():
    model.cuda()             # Convert the model to GPU compatible. 

# Define Loss Function, Optimizer
loss_function = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.2) 

# Tensorboard
log_model_path = os.path.join(LOG_DIR, 
                              '{model}_{setting}_{optimizer}'.format(model=str(model.__class__.__name__), 
                               setting=setting, 
                               optimizer=optimizer.__class__.__name__))
if not os.path.exists(log_model_path):
    os.mkdir(log_model_path)
writer = SummaryWriter(log_dir=os.path.join(log_model_path, datetime.now().strftime('%d_%B_%Y_%Hh_%Mm_%Ss')))
dummy_input_tensor = torch.Tensor(1, 3, 96, 96).cuda()
writer.add_graph(model, dummy_input_tensor)

# Checkpoints folder to save model
if not os.path.exists(checkpoints_path):
    os.makedirs(checkpoints_path)
checkpoints_path = os.path.join(checkpoints_path, '{model}_{setting}_{optimizer}')

# Initialize the early_stopping_counter.
early_stopping_counter = 0

# Train and Evaluting the model
min_validation_loss = float('inf')
for epoch in range(1, epochs):
    # Adaptive Learning Rate
    scheduler.step(epoch)
    # Checking for early stopping condition. 
    if early_stopping_counter > patience and epoch >= min_early_stopping:
        print("Early Stopping the model training as there no significant improvment in the eval loss.")
        break
    train(epoch)
    accuracy, validation_loss = test(epoch)
    if validation_loss < min_validation_loss:
        torch.save(model.state_dict(), checkpoints_path.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)))
        min_validation_loss = validation_loss
        early_stopping_counter = 0
    else: 
        early_stopping_counter += 1 


In [None]:
# Run this code bloack to load the model and compute different metrics on validation dataset.  
from sklearn.metrics import precision_score, recall_score, accuracy_score
import torchvision.transforms as transforms
import os
import torch.optim as optim
import torchvision
import torch
from torch.utils.data import DataLoader
import sys


mean = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
std = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

batch_norm = True
dropout = False
setting = ''
if dropout:
    setting = 'Dropout'
elif batch_norm:
    setting = 'BatchNormalization'
else:
    setting = 'NoRegularization'

DATA_ROOT = './data'
checkpoints_path = '/content/drive/My Drive/Deep Learning Assignment/checkpoints'
checkpoints_path = os.path.join(checkpoints_path, '{model}_{setting}_{optimizer}')
batch_size = 256
device = 'cpu'
try:
    model = model = InceptionV2(batch_norm = batch_norm, dropout = dropout, init_weights=False) #, init_weights=True)
except Exception as e:
    sys.exit("Please load the model by running the first block.")

if torch.cuda.is_available():
    device = 'cuda'
    model.cuda()

optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

transform_test = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)])

# Load testing data from CIFAR100
test_data = torchvision.datasets.CIFAR100(root=DATA_ROOT, train=False, download=True, transform=transform_test)
test_loader = DataLoader(test_data, shuffle=True, num_workers=4, batch_size=batch_size)
cuda_available = torch.cuda.is_available()

print("Model Weights at path: ", checkpoints_path.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)))
# Load model weights.
model.load_state_dict(torch.load(checkpoints_path.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)), map_location=torch.device(device)))



model.eval()
accuracy = 0.0
precision = 0.0
recall = 0

with torch.no_grad():
    y_pred = []
    y_true = []
    for iter, (image, labels) in enumerate(test_loader):
        # print("iteration: {}\ttotal {} iterations".format(iter + 1, len(test_loader)))
        # Convert the inputs to GPU compatible tensors. 
        if cuda_available:
            image = image.cuda()
            label = labels.cuda()
        # Predicting
        output0, output1 = model(image)
        output_values, predicted = output0.max(1)

        y_pred.extend(predicted.cpu().tolist())
        y_true.extend(labels.cpu().tolist())

accuracy = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred, labels=range(100), average = 'macro')#average='macro', zero_division=1)
precision = precision_score(y_true, y_pred, average='macro', zero_division=1)

print('\n')
print("Model Setting: ", '{model}_{setting}_{optimizer}'.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)))


print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-100-python.tar.gz to ./data
Model Weights at path:  /content/drive/My Drive/Deep Learning Assignment/checkpoints/InceptionV2_BatchNormalization_Adam



Model Setting:  InceptionV2_BatchNormalization_Adam
Accuracy:  0.636
Precision:  0.6380864105632366
Recall:  0.6359999999999999
