In [15]:
%%sh
pip3 install torch torchvision torchaudio
pip3 install torchsummary
pip3 install tqdm
pip3 install matplotlib

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


You should consider upgrading via the '/share/apps/python/3.8.6/intel/bin/python -m pip install --upgrade pip' command.
You should consider upgrading via the '/share/apps/python/3.8.6/intel/bin/python -m pip install --upgrade pip' command.
You should consider upgrading via the '/share/apps/python/3.8.6/intel/bin/python -m pip install --upgrade pip' command.
You should consider upgrading via the '/share/apps/python/3.8.6/intel/bin/python -m pip install --upgrade pip' command.


In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import multiprocessing
import torchvision
import numpy as np
from torch.utils.data import DataLoader
import torch
torch.manual_seed(17)

import numpy as np
from torchsummary import summary
from tqdm import tqdm
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [17]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, kernel, skip_kernel, stride=1, bias=True):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=kernel[0], stride=stride, padding=kernel[1], bias=bias)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=kernel[0],
                               stride=1, padding=kernel[1], bias=bias)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes,
                          kernel_size=skip_kernel[0], padding=skip_kernel[1], stride=stride, bias=bias),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, in_planes, num_layers, num_blocks, kernel, skip_kernel, num_classes=10, bias=True):
        if not isinstance(num_blocks, list):
            raise Exception("num_blocks parameter should be a list of integer values")
        if num_layers != len(num_blocks):
            raise Exception("Residual layers should be equal to the length of num_blocks list")
        super(ResNet, self).__init__()
        self.kernel = kernel
        self.skip_kernel = skip_kernel
        self.in_planes = in_planes
        self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=kernel[0],
                               stride=1, padding=kernel[1], bias=bias)
        self.bn1 = nn.BatchNorm2d(self.in_planes)
        self.num_layers = num_layers
        self.layer1 = self._make_layer(block, self.in_planes, num_blocks[0], stride=1, bias=bias)
        for i in range(2, num_layers+1):
            setattr(self, "layer"+str(i), self._make_layer(block, 2*self.in_planes, num_blocks[i-1], stride=2, bias=bias))
        finalshape = list(getattr(self, "layer"+str(num_layers))[-1].modules())[-2].num_features
        self.multiplier = 4 if num_layers == 2 else (2 if num_layers == 3 else 1)
        self.linear = nn.Linear(finalshape, num_classes)
        self.path = "./project1_model.pt"

    def _make_layer(self, block, planes, num_blocks, stride, bias=True):
        strides = [stride] + [1]*(num_blocks-1)
        custom_layers = []
        for stride in strides:
            custom_layers.append(block(self.in_planes, planes,self.kernel,self.skip_kernel, stride, bias))
            self.in_planes = planes
        return nn.Sequential(*custom_layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        for i in range(1, self.num_layers+1):
            out = eval("self.layer" + str(i) + "(out)")
        out = F.avg_pool2d(out, 4*self.multiplier)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

    def saveToDisk(self):
        torch.save(self.state_dict(), self.path)

    def loadFromDisk(self):
        self.load_state_dict(torch.load(self.path))

def project1_model():
    return ResNet(BasicBlock, 32, 4, [4, 4, 4, 2],kernel=(3,1),skip_kernel=(1,0), num_classes=10, bias=True)

    model = ResNet(BasicBlock, 32, 4, [4, 4, 4, 2],kernel=(3,1),skip_kernel=(1,0), num_classes=10, bias=True)
    trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(trainable_parameters)
    x = torch.rand(1, 3, 32, 32)
    model(x)

In [18]:
class DatasetFetcher:
    def __init__(self, dataset="CIFAR10", batch_size=64):
        print("Initializing fetching %s dataset using torchvision"%(dataset))
        self.datasetObject = torchvision.datasets.__dict__.get(dataset, None)
        if self.datasetObject == None:
            raise Exception("Dataset %s not available in torchvision."%(dataset))
        self.batch_size = batch_size
        self.train_transformers = []
        self.test_transformers = []
        self.workersAvailable = min(multiprocessing.cpu_count(), 14)

    def addHorizontalFlipping(self):
        self.train_transformers.append(torchvision.transforms.RandomHorizontalFlip())

    def addVerticalFlipping(self):
        self.train_transformers.append(torchvision.transforms.RandomVerticalFlip())
        
    def addRandomCrop(self, size=32, padding=3):
        self.train_transformers.append(torchvision.transforms.RandomCrop(size=size, padding=padding))
        
    def addHistogramEqualization(self):
        self.train_transformers.append(torchvision.transforms.functional.equalize)
        self.test_transformers.append(torchvision.transforms.functional.equalize)

    def __addToTensor(self):
        self.train_transformers.append(torchvision.transforms.ToTensor())
        self.test_transformers.append(torchvision.transforms.ToTensor())
        
    def __loadTrainNormalizers(self):
        params = np.load("./trainNormalizedParameters.npz")
        return params['mean'], params['std']

    def addNormalizer(self):
        self.__addToTensor()
        trainingDataset = self.datasetObject(root="./data", train=True, download=True)
        trainData = trainingDataset.data/255.0
        mean = trainData.mean(axis=(0, 1, 2))
        std = trainData.std(axis=(0, 1, 2))
        np.savez("./trainNormalizedParameters", mean=mean, std=std)
        self.train_transformers.append(torchvision.transforms.Normalize(mean=mean, std=std))
        self.test_transformers.append(torchvision.transforms.Normalize(mean=mean, std=std))
        
    def addAutoAugmentation(self):
        self.train_transformers.append(torchvision.transforms.AutoAugment(torchvision.transforms.AutoAugmentPolicy.CIFAR10))
    
    def addTrivialAugmentation(self):
        self.train_transformers.append(torchvision.transforms.TrivialAugmentWide())
        self.__addToTensor()

    def getLoaders(self):
        if len(self.train_transformers) == 0:
            self.__addToTensor()
        trainingDataset = self.datasetObject(root="./data", train=True, download=True, transform=torchvision.transforms.Compose(self.train_transformers))
        testingDataset = self.datasetObject(root="./data", train=False, download=True, transform=torchvision.transforms.Compose(self.test_transformers))
        trainLoader = DataLoader(trainingDataset, batch_size=self.batch_size, shuffle=True, num_workers=self.workersAvailable)
        testLoader = DataLoader(testingDataset, batch_size=self.batch_size, shuffle=False, num_workers=self.workersAvailable)
        return trainLoader, testLoader
    
    def getTestLoader(self):
        mean, std = self.__loadTrainNormalizers()
        self.test_transformers.append(torchvision.transforms.ToTensor())
        self.test_transformers.append(torchvision.transforms.Normalize(mean=mean, std=std))
        testingDataset = self.datasetObject(root="./data", train=False, download=True, transform=torchvision.transforms.Compose(self.test_transformers))
        testLoader = DataLoader(testingDataset, batch_size=self.batch_size, shuffle=False, num_workers=self.workersAvailable)
        return testLoader

# df = DatasetFetcher(dataset="CIFAR10", batch_size=128)
# # df.addHorizontalFlipping()
# # df.addRandomCrop(size=32, padding=3)
# # df.addHistogramEqualization()
# # df.addNormalizer()
# df.addTrivialAugmentation()
# trainLoader, testLoader = df.getLoaders()

In [19]:
model = ResNet(BasicBlock, 32, 4, [4, 4, 4, 2],kernel=(3,1),skip_kernel=(1,0), num_classes=10, bias=True).to(device)
print(summary(model, input_size=(3, 32, 32)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
       BatchNorm2d-2           [-1, 32, 32, 32]              64
            Conv2d-3           [-1, 32, 32, 32]           9,248
       BatchNorm2d-4           [-1, 32, 32, 32]              64
            Conv2d-5           [-1, 32, 32, 32]           9,248
       BatchNorm2d-6           [-1, 32, 32, 32]              64
        BasicBlock-7           [-1, 32, 32, 32]               0
            Conv2d-8           [-1, 32, 32, 32]           9,248
       BatchNorm2d-9           [-1, 32, 32, 32]              64
           Conv2d-10           [-1, 32, 32, 32]           9,248
      BatchNorm2d-11           [-1, 32, 32, 32]              64
       BasicBlock-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 32, 32, 32]           9,248
      BatchNorm2d-14           [-1, 32,

In [None]:
# EPOCHS=20
# globalBestAccuracy = 0.0
# trainingLoss = []
# testingLoss = []
# trainingAccuracy = []
# testingAccuracy = []

In [None]:
# # Defining Loss Function, Learning Rate, Weight Decay, Optimizer) 
# lossFunction = torch.nn.CrossEntropyLoss(reduction='sum')
# learningRate = 0.1
# weightDecay = 0.0001
# #optimizer = torch.optim.Adam(model.parameters(), lr=learningRate, weight_decay=weightDecay)
# #optimizer = torch.optim.Adagrad(model.parameters(), lr=learningRate, weight_decay=weightDecay)


# optimizer = torch.optim.Adadelta(model.parameters(), lr=learningRate, weight_decay=weightDecay)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, EPOCHS, eta_min=learningRate/10.0)
# print(model.eval())
# trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
# print("Total Trainable Parameters : %s"%(trainable_parameters))
# if trainable_parameters > 5*(10**6):
#     raise Exception("Model not under budget!")

In [None]:
# for i in tqdm(range(EPOCHS)):
#     for phase in ['train', 'test']:
#         if phase == "train":
#             loader = trainLoader
#             model.train()
#             optimizer.zero_grad()
#         else:
#             loader = testLoader
#             model.eval()
#         runningLoss = 0.0
#         runningCorrects = 0
#         for images, labels in loader:
#             images = images.to(device)
#             labels = labels.to(device)
#             output = model(images)
#             loss = lossFunction(output, labels)
#             predicted_labels = torch.argmax(output, dim=1)
#             #runningLoss += loss.item()*images.size(0)
#             runningLoss += loss.item()
#             runningCorrects += torch.sum(predicted_labels == labels).float().item()
#             if phase == "train":
#                 loss.backward()
#                 optimizer.step()
#         epochLoss = runningLoss/len(loader.dataset)
#         epochAccuracy = runningCorrects/len(loader.dataset)
#         if phase == "train":
#             scheduler.step()
#             trainingLoss.append(epochLoss)
#             trainingAccuracy.append(epochAccuracy)
#         else:
#             testingLoss.append(epochLoss)
#             testingAccuracy.append(epochAccuracy)
#             if epochAccuracy > globalBestAccuracy:
#                 globalBestAccuracy = epochAccuracy
#                 model.saveToDisk()
#     print("Training Loss : %s, Testing Loss : %s, Training Accuracy : %s, Testing Accuracy : %s"\
#           %(trainingLoss[-1], testingLoss[-1], trainingAccuracy[-1], testingAccuracy[-1]))

In [20]:
optimizers_dict = {
    "adam": torch.optim.Adam,
    "adagrad": torch.optim.Adagrad,
    "adadelta": torch.optim.Adadelta,
    "sgd": torch.optim.SGD
}

In [21]:
def main(model, data_augmentation=['trivial_aug'], epochs=100, optim="adadelta", batch_size=512, print_every=10):
    df = DatasetFetcher(dataset="CIFAR10", batch_size=batch_size)

    for aug in data_augmentation:
        if aug == 'trivial_aug':
            df.addTrivialAugmentation()
        elif aug == 'horizontal_flip':
            df.addHorizontalFlipping()
        elif aug == 'random_crop':
            df.addRandomCrop(size=32, padding=3)
        elif aug == 'histogram_equalization':
            df.addHistogramEqualization()
        elif aug == 'normalizer':
            df.addNormalizer()
        elif aug == 'vertical_flip':
            df.addVerticalFlipping()
        elif aug == 'auto_aug':
            df.addAutoAugmentation()
    
    trainLoader, testLoader = df.getLoaders()

    EPOCHS=epochs
    globalBestAccuracy = 0.0
    trainingLoss = []
    testingLoss = []
    trainingAccuracy = []
    testingAccuracy = []

    # Defining Loss Function, Learning Rate, Weight Decay, Optimizer) 
    lossFunction = torch.nn.CrossEntropyLoss(reduction='sum')
    learningRate = 0.1
    weightDecay = 0.0001

    # optimizer = torch.optim.Adam(model.parameters(), lr=learningRate, weight_decay=weightDecay)
    # optimizer = torch.optim.Adagrad(model.parameters(), lr=learningRate, weight_decay=weightDecay)
    # optimizer = torch.optim.Adadelta(model.parameters(), lr=learningRate, weight_decay=weightDecay)

    # optimizers_dict = {
    #     "adam": torch.optim.Adam,
    #     "adagrad": torch.optim.Adagrad,
    #     "adadelta": torch.optim.Adadelta,
    #     "sgd": torch.optim.SGD
    # }

    optimizer_fn = optimizers_dict[optim]
    optimizer = optimizer_fn(model.parameters(), lr=learningRate, weight_decay=weightDecay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, EPOCHS, eta_min=learningRate/10.0)
    # print(model.eval())

    trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Total Trainable Parameters : %s"%(trainable_parameters))
    
    if trainable_parameters > 5*(10**6):
        raise Exception("Model not under budget!")

    print(f"Total Epochs : {EPOCHS} | Optimizer : {optim} | Learning Rate : {learningRate} | Batch Size : {batch_size}")
    print(f"Data Augmentation : {data_augmentation}")

    # for i in tqdm(range(EPOCHS)):
    for i in range(EPOCHS+1):
        for phase in ['train', 'test']:
            if phase == "train":
                loader = trainLoader
                model.train()
                optimizer.zero_grad()
            else:
                loader = testLoader
                model.eval()
            runningLoss = 0.0
            runningCorrects = 0
            for images, labels in loader:
                images = images.to(device)
                labels = labels.to(device)
                output = model(images)
                loss = lossFunction(output, labels)
                predicted_labels = torch.argmax(output, dim=1)
                #runningLoss += loss.item()*images.size(0)
                runningLoss += loss.item()
                runningCorrects += torch.sum(predicted_labels == labels).float().item()
                if phase == "train":
                    loss.backward()
                    optimizer.step()
            epochLoss = runningLoss/len(loader.dataset)
            epochAccuracy = runningCorrects/len(loader.dataset)
            if phase == "train":
                scheduler.step()
                trainingLoss.append(epochLoss)
                trainingAccuracy.append(epochAccuracy)
            else:
                testingLoss.append(epochLoss)
                testingAccuracy.append(epochAccuracy)
                if epochAccuracy > globalBestAccuracy:
                    globalBestAccuracy = epochAccuracy
                    model.saveToDisk()

        if i % print_every == 0:
            print("Epoch : %s, Training Loss : %s, Testing Loss : %s, Training Accuracy : %s, Testing Accuracy : %s"\
              %(i, trainingLoss[-1], testingLoss[-1], trainingAccuracy[-1], testingAccuracy[-1]))

In [11]:
# Data Augmentation: df.addTrivialAugmentation()
model = ResNet(BasicBlock, 32, 4, [4, 4, 4, 2],kernel=(3,1),skip_kernel=(1,0), num_classes=10, bias=True).to(device)
main(model, data_augmentation=['trivial_aug'], epochs=200, optim='adadelta', batch_size=512)

Initializing fetching CIFAR10 dataset using torchvision
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:09<00:00, 17628304.78it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Total Trainable Parameters : 3576842
Total Epochs : 200 | Optimizer : adadelta | Learning Rate : 0.1 | Batch Size : 512
Data Augmentation : ['trivial_aug']
Epoch : 0, Training Loss : 1.9580760949707032, Testing Loss : 1.9975725219726563, Training Accuracy : 0.28, Testing Accuracy : 0.2999
Epoch : 10, Training Loss : 0.9269402026367187, Testing Loss : 0.8281432662963867, Training Accuracy : 0.67318, Testing Accuracy : 0.7183
Epoch : 20, Training Loss : 0.6449001486206055, Testing Loss : 0.5719282989501953, Training Accuracy : 0.77022, Testing Accuracy : 0.8119
Epoch : 30, Training Loss : 0.4877888565063477, Testing Loss : 0.6073644805908203, Training Accuracy : 0.83042, Testing Accuracy : 0.8254
Epoch : 40, Training Loss : 0.42329267547607424, Testing Loss : 0.590873991394043, Training Accuracy : 0.85334, Testing Accuracy : 0.839
Epoch : 50, Training Loss : 0.38254834869384763, Testing Loss : 0.5321

In [12]:
model = ResNet(BasicBlock, 32, 4, [4, 4, 4, 2],kernel=(3,1),skip_kernel=(1,0), num_classes=10, bias=True).to(device)
main(model, data_augmentation=['trivial_aug', 'horizontal_flip', 'random_crop'], epochs=200, optim='adadelta', batch_size=512)

Initializing fetching CIFAR10 dataset using torchvision
Files already downloaded and verified
Files already downloaded and verified
Total Trainable Parameters : 3576842
Total Epochs : 200 | Optimizer : adadelta | Learning Rate : 0.1 | Batch Size : 512
Data Augmentation : ['trivial_aug', 'horizontal_flip', 'random_crop']
Epoch : 0, Training Loss : 1.9790977270507812, Testing Loss : 4.803305517578125, Training Accuracy : 0.26556, Testing Accuracy : 0.1689
Epoch : 10, Training Loss : 1.0910157543945314, Testing Loss : 0.9117696990966797, Training Accuracy : 0.61382, Testing Accuracy : 0.6817
Epoch : 20, Training Loss : 0.8085469342041015, Testing Loss : 0.6477414871215821, Training Accuracy : 0.71572, Testing Accuracy : 0.7758
Epoch : 30, Training Loss : 0.6758965310668945, Testing Loss : 0.4842376159667969, Training Accuracy : 0.76178, Testing Accuracy : 0.8418
Epoch : 40, Training Loss : 0.5987245880126953, Testing Loss : 0.44454466400146486, Training Accuracy : 0.7893, Testing Accuracy

In [13]:
# # Data Augmentation: df.addHorizontalFlipping(), df.addRandomCrop(size=32, padding=3), df.addHistogramEqualization(), df.addNormalizer()
# model = ResNet(BasicBlock, 32, 4, [4, 4, 4, 2],kernel=(3,1),skip_kernel=(1,0), num_classes=10, bias=True).to(device)
# main(model, epochs=100, optim='adadelta')