In [17]:
import numpy as np 
import pandas as pd 
import torchsummary
from torchsummary import summary
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import random
import torch
from sklearn.preprocessing import LabelBinarizer
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
import time
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import PIL
from PIL import Image
import torch.utils.data
import matplotlib
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

# Data Pipeline

In [3]:
## Dataset workflow
## -> read numpy file in -> transform to PIL image -> do PIL image preprocessing 
## -> convert to tensor (requires inputs in form H*W*C) -> Normalize 

class Cifar10Dataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, images_dir, labels_dir, transforms = None):
        #initalize images & labels 
        self.images_path = os.path.join(root_dir, images_dir)
        self.labels_path = os.path.join(root_dir, labels_dir)
        self.transforms = transforms
        self.images = np.load(self.images_path)
        # Labels - Get set of unique labels then map them to idx 
        self.allLabels = np.array(pd.read_csv(self.labels_path)['Category'])
        uniqueLabels= set(self.allLabels)
        self.labelToIdx = {label:idx for idx,label in enumerate(uniqueLabels)}

    def __len__(self):
        #returns total number of images in dataset 
        return len(self.images)
    
    def __getitem__(self, key):
        #generates one sample of data 
        x = self.images[key]
        #transpose image to H*W*C -> required shape as input for torch.tensor() 
        x = x.transpose(1,2,0)
        if self.transforms:
            #transform to PIL image and two data augmentations (Random Horziontal Flip and Random GrayScale)
            #afterwards transform to a torch tensor and normalize every channel to a range in between [-1,1]
            x = self.transforms(x)
        # Most loss functions in Pytorch don't take one-hot or binarized vectors (very space inefficient). 
        # instead take an integer C which represents the class label of a particular image 
        stringLabel = self.allLabels[key]
        y = self.labelToIdx[stringLabel]
        # yields one image, fully preprocessed and its corresponding integer label
        return x,y
        

In [4]:
## Define Transformations
data_transform = transforms.Compose([
    transforms.ToPILImage(), 
    transforms.RandomHorizontalFlip(),
    transforms.RandomGrayscale(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.5,0.5,0.5], std=[0.5, 0.5, 0.5])
])

In [15]:
x = Cifar10Dataset(root_dir='../input/cifar10-comp/', images_dir='train_images.npy',
                    labels_dir='train_labels.csv', transforms=data_transform)
labels = x.allLabels
z = set(labels)
labels_to_idx = {label:idx for idx, label in enumerate(z)}
labels_to_idx

In [16]:
# # Define DataSet
transformed_trainData = Cifar10Dataset(root_dir='../input/cifar10-comp/', images_dir='train_images.npy',
                                      labels_dir='train_labels.csv', transforms=data_transform)

In [None]:
# Create Validation Set
train_size = int(0.8*len(transformed_trainData))
valid_size = len(transformed_trainData)-train_size
train_data, valid_data = torch.utils.data.random_split(transformed_trainData, [train_size, valid_size])

In [None]:
# Define Generator for Dataset
params = {'batch_size':128, 'shuffle':True, 'num_workers':6}
training_generator = DataLoader(train_data, **params)

validation_generator = DataLoader(valid_data, **params)

In [None]:
# Visualize Images
def image_visualizer(dataset):
    random.seed(90)
    
    #Before Preprocessing 
    plt.figure()
    fig, axes = plt.subplots(nrows=3, ncols = 3, figsize = (9,9))
    savedIdxs = [] 
    for idx, axis in enumerate(axes.flatten()):
        random_idx = random.randint(0,49999)
        savedIdxs.append(random_idx)
        random_picture = dataset.images[random_idx].transpose(1,2,0)
        label = dataset.allLabels[random_idx]
        axis.set_title(label)
        axis.imshow(random_picture)
    plt.tight_layout()
    fig.suptitle('Before Preprocessing')
    fig.subplots_adjust(top = 0.93)
    plt.show()
    
    #After Preprocessing
    plt.figure()
    fig2, axes2 = plt.subplots(nrows=3, ncols = 3, figsize = (9,9))
    for idx, axis in enumerate(axes2.flatten()):
        random_idx = savedIdxs[idx]
        random_picture = dataset[random_idx][0].numpy().transpose(1,2,0)
        label = dataset.allLabels[random_idx]
        axis.set_title(label)
        axis.imshow(random_picture)
    plt.tight_layout()
    fig2.suptitle('After Preprocessing')
    fig2.subplots_adjust(top = 0.93)
    plt.show()

image_visualizer(transformed_trainData)

# ResNet

In [7]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride =1):
        super(ResBlock, self).__init__()
        
        #First Conv Layer
        self.conv1 = nn.Conv2d(kernel_size=(3,3),stride=stride, padding=1, bias= False, in_channels=in_channels, 
                               out_channels=out_channels)
        
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        #Second Conv Layer
        self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels, 
                              kernel_size = (3,3), stride = 1, padding=1, bias = False)
        
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        #Skip connection, downsampled only if the output channels are different than the input channels        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(nn.Conv2d(in_channels=in_channels, 
                                                   out_channels=out_channels,
                                                   kernel_size = (1,1), stride=stride, bias = False),
                                         nn.BatchNorm2d(out_channels)
                                         )
            

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.ReLU()(out)
        return out
            
                

In [8]:
class ResNet(nn.Module):
    def __init__(self, num_classes = 10):
        super(ResNet, self).__init__()
        
        #Input Conv
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3,3), stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        #Residual Blocks
        self.block1 = self._create_block(64, 64, stride=1)
        self.block2 = self._create_block(64, 128, stride=2)
        self.block3 = self._create_block(128, 256, stride=2)
        self.block4 = self._create_block(256, 512, stride =2)
        self.linear = nn.Linear(512, num_classes)
        
    def _create_block(self, in_channels, out_channels, stride):
        return nn.Sequential(ResBlock(in_channels, out_channels, stride),
                             ResBlock(out_channels, out_channels, 1))
    
    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.block4(out)
        out = nn.AvgPool2d(4)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out 

# Training

In [9]:
# Initialize network to train on GPU, using Adam optimization strategy 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
clf = ResNet()
clf.to(device)
cost_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(clf.parameters(), lr=0.1, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.00001, amsgrad=False)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)

In [None]:
def trainResNet(model = clf ,device = device, cost_function = cost_function, optimizer = optimizer, scheduler = scheduler, epochs = 10, training_data = training_generator, validation_data = validation_generator):
    training_loss = []
    validation_loss = [] 
    training_accuracy = []
    validation_accuracy = [] 
    epochNum = []
    for epoch in range(1, epochs+1):
        print('Epoch Num: %s'%(epoch))
        epochNum.append(epoch)
        epoch_loss = []
        epoch_accuracy = [] 
        start = time.time()
        for batch_idx, (inputs, targets) in enumerate(training_data):
            # Put the batch on the GPU 
            
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            
            #Forward Pass
            
            predictions = clf(inputs)
            
            #Calculate Cost
            
            loss = cost_function(predictions, targets)
            epoch_loss.append(loss.item())
            
            #Get Train Accuracy
            
            _, predicted = torch.max(predictions.data, 1)
            total = len(targets)
            correct = int(torch.sum(predicted == targets.data))
            accuracy = (correct/total)*100
            epoch_accuracy.append(accuracy)
            
            #Backpropagation to calculate gradients
            
            loss.backward()
            intermediate_end = time.time()
            
            # Making sure gradient flow is behaving as expected 
            if batch_idx % 300 == 0:
                plot_grad_flow(model.named_parameters())
                plt.show()
                
            # Test to ensure that the parameters update properly every 300 mini batches
            if batch_idx %300 == 0:
                original_params = []
                for param in model.parameters():
                    original_params.append(param.clone())
            
            # Perform Parameter Update
            optimizer.step() 
            
            # Test to ensure that the parameters update properly every 300 mini batches
            if batch_idx % 300 == 0:
                for original_param, updated_param in zip(original_params, model.parameters()):
                    try:
                        (original_param != updated_param).any()
                    except:
                        print("Parameters didn't update this mini-batch")
                print('Test passed, all parameters updated!')
                
            # Updates during training
            if batch_idx % 200 == 0:
                print('Batch Index : %s Loss : %.3f Accuracy: %.2f Time : %.3f seconds ' % (batch_idx, np.mean(epoch_loss), np.mean(epoch_accuracy), intermediate_end - start))
            
        #Put model into eval mode to get validation loss and accuracy 
        model.eval()
        avgLossEpoch = np.mean(epoch_loss)
        avgAccuracyEpoch = np.mean(epoch_accuracy)
        training_loss.append(avgLossEpoch)
        training_accuracy.append(avgAccuracyEpoch)
        
        val_loss, val_accuracy = validation(model, validation_data, cost_function, device)
        validation_loss.append(val_loss)
        validation_accuracy.append(val_accuracy)        
        
        model.train()
        scheduler.step(val_loss)
        end = time.time()
        print('Time for epoch %s is %.2f s, Training Loss: %.3f, Training Accuracy: %.3f, Validation Loss: %.3f, Validation Accuracy: %.3f'%(epoch, end-start, avgLossEpoch, avgAccuracyEpoch, val_loss, val_accuracy))
    return training_loss, validation_loss, training_accuracy, validation_accuracy, epochNum

def validation(model, validation_data, cost_function, device):
    with torch.no_grad():
        loss = [] 
        accuracy = [] 
        for inputs, targets in validation_data:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            cost = cost_function(outputs, targets)
            _, predicted = torch.max(outputs.data, 1)
            total = len(targets)
            correct = int(torch.sum(predicted == targets.data))
            accuracy.append((correct/total)*100)
            loss.append(cost.item())
        return np.mean(loss), np.mean(accuracy)

In [None]:
def plot_grad_flow(named_parameters):
    ave_grads, max_grads, layers = getGradientsOfNetwork(named_parameters)
    plt.figure(figsize=(20, 6))
    plt.bar(np.arange(len(ave_grads)), max_grads, alpha=0.1, lw=1, color="c")
    plt.bar(np.arange(len(ave_grads)), ave_grads, alpha=0.1, lw=1, color="b")
    plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" )
    plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
    plt.xlabel("Layers")
    plt.ylabel("average gradient")
    plt.title("Gradient flow")
    plt.grid(True)
    plt.legend([Line2D([0], [0], color="c", lw=4),
                Line2D([0], [0], color="b", lw=4),
                Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient'])
    
def getGradientsOfNetwork(named_parameters):
    layers = []
    avg_grad = []
    max_grad = []
    for n, p in named_parameters:
        if(p.requires_grad) and ("bias" not in n):
            layers.append(n)
            avg_grad.append(p.grad.abs().mean())
            max_grad.append(p.grad.abs().max())
    return avg_grad, max_grad, layers

# Evaluation

In [None]:
training_loss, validation_loss, training_accuracy, validation_accuracy, epochNum = trainResNet(epochs = 135)

In [2]:
def visualizeTrainValidationLoss(training_loss, validation_loss, trainAcc, validAcc, epochNum):
    plt.figure(figsize = (10,6))
    plt.plot(epochNum, training_loss)
    plt.plot(epochNum, validation_loss,)
    plt.xlabel('Number of Epochs')
    plt.ylabel('Loss')
    plt.show() 
    
    plt.figure(figsize = (10,6))
    plt.plot(epochNum, trainAcc)
    plt.plot(epochNum, validAcc)
    plt.xlabel('Number of Epochs')
    plt.ylabel('Accuracy')
    plt.show() 

In [14]:
visualizeTrainValidationLoss(training_loss, validation_loss, training_accuracy, validation_accuracy, epochNum)

In [13]:
summary(clf, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]          36,864
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
          ResBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
         ResBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13          [-1, 128, 16, 16]          73,728
      BatchNorm2d-14          [-1, 128,