In [2]:
import torch
import torch.nn as nn


config = [64, 64, 'Max', 128, 128, 'Max', 256, 256, 256, 'Max', 512, 512, 512, 'Max', 512, 512, 512, 'Max']


class VGG16(nn.Module):
    def __init__(self, features, num_classes=100, dropout = False, init_weights=True):
        super(VGG16, self).__init__()
        self.features = features
        # self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        # Add Dropout if specified. 
        if dropout:
            self.classifier = nn.Sequential(
                nn.Linear(512, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, num_classes),)
        else: 
            self.classifier = nn.Sequential(
                nn.Linear(512, 4096),
                nn.ReLU(True),
                nn.Linear(4096, 4096),
                nn.ReLU(True),
                nn.Linear(4096, num_classes),)
            
        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)
            


    def forward(self, x):
        x = self.features(x)
        # x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


def make_layers(config, batch_norm=False, in_channels = 3):
    layers = []
    for layer in config:
        if layer == 'Max':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, layer, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(layer), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(0.1)]
            in_channels = layer
    return nn.Sequential(*layers)


In [None]:
%reload_ext tensorboard

LOG_DIR = "/content/drive/My Drive/Deep Learning Assignment/logs"

import os   
import datetime
import tensorflow as tf

model = VGG16(make_layers(config, batch_norm = False), dropout = False, init_weights=False)
logdir = os.path.join(LOG_DIR, str(model.__class__.__name__), datetime.datetime.now().strftime('%d_%B_%Y_%Hh_%Mm_%Ss'))

tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
%tensorboard --logdir='/content/drive/My Drive/Deep Learning Assignment/logs'


In [None]:
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
import time
from datetime import datetime
import os
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score
import torch.nn as nn
from torchvision import models


def train(epoch):
    start = time.time() 
    y_pred = []
    y_true = []   

    # Run model in training mode
    model.train()

    for batch_index, (images, labels) in enumerate(training_loader):
        iter_num = (epoch - 1) * len(training_loader) + batch_index + 1
        # Convert the inputs to GPU compatible tensors. 
        if cuda_available:
            images = images.cuda()
            labels = labels.cuda()

        # Learning on the training data
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()

        output_values, predicted = outputs.max(1)
        y_pred.extend(predicted.cpu().tolist())
        y_true.extend(labels.cpu().tolist())

        # Printing the training results and storing them to tensorboard
        print('Training Epoch: {epoch} [{num_trained}/{num_samples}]\tLoss: {:0.2f}'.format(
            loss.item(),
            epoch = epoch,
            num_trained = batch_index * batch_size + len(images),
            num_samples = len(training_loader.dataset)))
        writer.add_scalar('Train: Loss', loss.item(), iter_num)
        
    # Calculating Metrics
    accuracy = accuracy_score(y_true, y_pred)

    # Time consumed for a epoch
    time_consumed = time.time() - start
    print('Time taken to train epoch {epoch}: {:.2f}s'.format(time_consumed, epoch = epoch))
    writer.add_scalar('Train Set: Accuracy', accuracy, epoch)



@torch.no_grad()
def test(epoch):
    test_loss = 0.0
    y_pred = []
    y_true = []
    len_test_loader = len(test_loader.dataset)
    start = time.time()
    
    # Run model in evaluation mode
    model.eval()

    for (images, labels) in test_loader:
        # Convert the inputs to GPU compatible tensors. 
        if cuda_available:
            images = images.cuda()
            labels = labels.cuda()

        # Predicting labels of test image set
        model_outputs = model(images)
        model_loss = loss_function(model_outputs, labels)
        test_loss += model_loss.item()
        output_values, predicted = model_outputs.max(1)
        y_pred.extend(predicted.cpu().tolist())
        y_true.extend(labels.cpu().tolist())
        
    # Calculating Metrics
    accuracy = accuracy_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=1)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=1)
    loss = test_loss / len_test_loader
    time_consumed = time.time() - start

    # GPU stats
    # if cuda_available:
    #     print(torch.cuda.memory_summary())

    # Printing the testing results and storing them to tensorboard
    print('Testing Network for epoch: ', epoch)
    print('Evaluation: Evaluation Time: {:.2f}s, Average loss: {:.4f}, Accuracy: {:.4f}, Recall: {:.4f}, Precision: {:.4f}'.format(time_consumed, loss, accuracy, recall, precision))
    writer.add_scalar('Test Set: Average loss', loss, epoch)
    writer.add_scalar('Test Set: Accuracy', accuracy, epoch)
    return accuracy, test_loss


global cuda_available
global writer
global batch_size
DATA_ROOT = './data'
batch_size = 256
epochs = 200
min_early_stopping = 150
patience = 20
milestones = [50, 100, 150]
LOG_DIR = '/content/drive/My Drive/Deep Learning Assignment/logs'
checkpoints_path = '/content/drive/My Drive/Deep Learning Assignment/checkpoints'
batch_norm = True
dropout = False


setting = ''
if dropout:
    setting = 'Dropout'
elif batch_norm:
    setting = 'BatchNormalization'
else:
    setting = 'NoRegularization'

# Method to compute mean and std. 
# def compute_mean_std(cifar100_dataset):
#     data_r = numpy.dstack([cifar100_dataset[i][1][:, :, 0] for i in range(len(cifar100_dataset))])
#     data_g = numpy.dstack([cifar100_dataset[i][1][:, :, 1] for i in range(len(cifar100_dataset))])
#     data_b = numpy.dstack([cifar100_dataset[i][1][:, :, 2] for i in range(len(cifar100_dataset))])
#     mean = numpy.mean(data_r), numpy.mean(data_g), numpy.mean(data_b)
#     std = numpy.std(data_r), numpy.std(data_g), numpy.std(data_b)
#     return mean, std

# mean and std are computed on the training set. 
mean = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
std = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)


transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Load training data from CIFAR100
train_data = torchvision.datasets.CIFAR100(root=DATA_ROOT, train=True, download=True, transform=transform_train)
training_loader = DataLoader(train_data, shuffle=True, num_workers=4, batch_size=batch_size)

# Load testing data from CIFAR100
test_data = torchvision.datasets.CIFAR100(root=DATA_ROOT, train=False, download=True, transform=transform_test)
test_loader = DataLoader(test_data, shuffle=True, num_workers=4, batch_size=batch_size)

# Model
model = VGG16(make_layers(config, batch_norm = batch_norm), dropout = dropout, init_weights=True)
print(model)

# Check if any GPU is available
cuda_available = torch.cuda.is_available()
if torch.cuda.is_available():
    model.cuda()             # Convert the model to GPU compatible. 

# Define Loss Function, Optimizer
loss_function = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.2) 

# Tensorboard
log_model_path = os.path.join(LOG_DIR, 
                              '{model}_{setting}_{optimizer}'.format(model=str(model.__class__.__name__), 
                               setting=setting, 
                               optimizer=optimizer.__class__.__name__))
if not os.path.exists(log_model_path):
    os.mkdir(log_model_path)
writer = SummaryWriter(log_dir=os.path.join(log_model_path, datetime.now().strftime('%d_%B_%Y_%Hh_%Mm_%Ss')))
dummy_input_tensor = torch.Tensor(1, 3, 32, 32).cuda()
writer.add_graph(model, dummy_input_tensor)

# Checkpoints folder to save model
if not os.path.exists(checkpoints_path):
    os.makedirs(checkpoints_path)
checkpoints_path = os.path.join(checkpoints_path, '{model}_{setting}_{optimizer}')

# Initialize the early_stopping_counter.
early_stopping_counter = 0

# Train and Evaluting the model
min_validation_loss = float('inf')
for epoch in range(1, epochs):
    # Adaptive Learning Rate
    scheduler.step(epoch)
    # Checking for early stopping condition. 
    if early_stopping_counter > patience and epoch >= min_early_stopping:
        print("Early Stopping the model training as there no significant improvment in the eval loss.")
        break
    train(epoch)
    accuracy, validation_loss = test(epoch)
    if validation_loss < min_validation_loss:
        torch.save(model.state_dict(), checkpoints_path.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)))
        min_validation_loss = validation_loss
        early_stopping_counter = 0
    else: 
        early_stopping_counter += 1 




Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
VGG16(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2

With rtol=1e-05 and atol=1e-05, found 10 element(s) (out of 100) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 9.260265525002195e+21 (3.3285522678181585e+26 vs. 3.3284596651629084e+26), which occurred at index (0, 84).
  check_tolerance, strict, _force_outplace, True, _module_class)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Epoch: 124 [45312/50000]	Loss: 0.75
Training Epoch: 124 [45568/50000]	Loss: 0.67
Training Epoch: 124 [45824/50000]	Loss: 0.66
Training Epoch: 124 [46080/50000]	Loss: 0.77
Training Epoch: 124 [46336/50000]	Loss: 0.70
Training Epoch: 124 [46592/50000]	Loss: 0.83
Training Epoch: 124 [46848/50000]	Loss: 0.92
Training Epoch: 124 [47104/50000]	Loss: 0.72
Training Epoch: 124 [47360/50000]	Loss: 0.82
Training Epoch: 124 [47616/50000]	Loss: 0.81
Training Epoch: 124 [47872/50000]	Loss: 0.80
Training Epoch: 124 [48128/50000]	Loss: 0.75
Training Epoch: 124 [48384/50000]	Loss: 0.78
Training Epoch: 124 [48640/50000]	Loss: 0.86
Training Epoch: 124 [48896/50000]	Loss: 0.85
Training Epoch: 124 [49152/50000]	Loss: 0.87
Training Epoch: 124 [49408/50000]	Loss: 1.00
Training Epoch: 124 [49664/50000]	Loss: 0.84
Training Epoch: 124 [49920/50000]	Loss: 0.69
Training Epoch: 124 [50000/50000]	Loss: 0.77
Time taken to train epoch 124: 27.1

In [4]:
# Run this code bloack to load the model and compute different metrics on validation dataset.  
from sklearn.metrics import precision_score, recall_score, accuracy_score
import torchvision.transforms as transforms
import os
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
import sys

# mean and std are computed on the training set. 
mean = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
std = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

batch_norm = True
dropout = False
setting = ''
if dropout:
    setting = 'Dropout'
elif batch_norm:
    setting = 'BatchNormalization'
else:
    setting = 'NoRegularization'

DATA_ROOT = './data'
checkpoints_path = '/content/drive/My Drive/Deep Learning Assignment/checkpoints'
checkpoints_path = os.path.join(checkpoints_path, '{model}_{setting}_{optimizer}')
batch_size = 256
device = 'cpu'
try:
    model = VGG16(make_layers(config, batch_norm = batch_norm), dropout = dropout, init_weights=False)
except Exception as e:
    sys.exit("Please load the model by running the first block.")

if torch.cuda.is_available():
    device = 'cuda'
    model.cuda()

# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)


transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)])

# Load testing data from CIFAR100
test_data = torchvision.datasets.CIFAR100(root=DATA_ROOT, train=False, download=True, transform=transform_test)
test_loader = DataLoader(test_data, shuffle=True, num_workers=4, batch_size=batch_size)
cuda_available = torch.cuda.is_available()

print("Model Weights at path: ", checkpoints_path.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)))
# Load model weights.
model.load_state_dict(torch.load(checkpoints_path.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)), map_location=torch.device(device)))


model.eval()
accuracy = 0.0
precision = 0.0
recall = 0

with torch.no_grad():
    y_pred = []
    y_true = []
    for iter, (image, labels) in enumerate(test_loader):
        # print("iteration: {}\ttotal {} iterations".format(iter + 1, len(test_loader)))
        # Convert the inputs to GPU compatible tensors. 
        if cuda_available:
            image = image.cuda()
            label = labels.cuda()
        # Predicting
        model_outputs = model(image)
        output_values, predicted = model_outputs.max(1)
        y_pred.extend(predicted.cpu().tolist())
        y_true.extend(labels.cpu().tolist())

accuracy = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred, labels=range(100), average = 'macro')#average='macro', zero_division=1)
precision = precision_score(y_true, y_pred, average='macro', zero_division=1)

print('\n')
print("Model Setting: ", '{model}_{setting}_{optimizer}'.format(
            model=str(model.__class__.__name__), 
            setting=setting, 
            optimizer=str(optimizer.__class__.__name__)))


print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)

Files already downloaded and verified
Model Weights at path:  /content/drive/My Drive/Deep Learning Assignment/checkpoints/VGG16_BatchNormalization_Adam


Model Setting:  VGG16_BatchNormalization_Adam
Accuracy:  0.6233
Precision:  0.6246941412015978
Recall:  0.6233000000000001
