# Deep Learning - CNN Classifier

Member 1: Anish Batra, ab8166

Member 2: Prashant Mahajan, prm349

In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.transforms import transforms
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler

In [0]:
import torch
import torchvision
from torchvision.transforms import transforms
from torch.utils.data import sampler
import torch.utils.data
import torch.backends.cudnn as cudnn

In [54]:
class getTrainAndValidation(sampler.Sampler):
    def __init__(self, num_samples, start=0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

    num_train = 49000
    num_val = 1000

    # Perform data augmentation and normalization
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1991, 0.2010))
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    # Splitting the dataset into 3 parts - Train, Validate and test
    trainset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(
        trainset,
        batch_size=64,
        sampler=getTrainAndValidation(num_train, 0),
        num_workers=2)

    valset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=transform_train)
    valloader = torch.utils.data.DataLoader(
        valset,
        batch_size=64,
        sampler=getTrainAndValidation(num_val, num_train),
        num_workers=2)

    testset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=64, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [0]:
# Use CUDA
device = torch.cuda.FloatTensor

In [32]:
device

torch.cuda.FloatTensor

In [0]:
print_every = 100

# Utility to reset the model if we want to re-intialize all our parameters
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()


# Flatten is used to convert channels, rows and columns of the data into a single
# long vector in Affine layers.
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size()
        return x.view(
            N, -1)  # Flatten the C* H* W values into a single vector per image

In [0]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #network architecture
        self.model = nn.Sequential(
            # Conv Layer Block 1: conv-bn-relu-conv-bn-relu-pool
            nn.Conv2d(
                in_channels=3,
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2),

            # Conv Layer Block 2: conv-bn-relu-conv-bn-relu-pool
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2),
            #     nn.Dropout2d(p=0.05),

            # Conv Layer Block 3: conv-bn-relu-conv-bn-relu-pool
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2),
            Flatten(),
            nn.Linear(4 * 4 * 128, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            #     nn.Dropout(p=0.1),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        return self.model(x)

In [0]:
def train(model, loss_fn, optimizer, num_epochs=1):
    for epoch in range(num_epochs):
        print("Starting epoch: %d / %d" % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(trainloader):
            # Get the inputs
            inputs = Variable(x.type(device))
            labels = Variable(y.type(device).long())

            scores = model(inputs)
            loss = loss_fn(scores, labels)

            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.item()))

            # Optimize the weights
            optimizer.zero_grad()

            loss.backward()
            optimizer.step()


def accuracyCheck(model, loader):
    if loader.dataset.train:
        print('Checking validation set accuracy')
    else:
        print('Checking Test set accuracy')

    num_correct = 0
    num_samples = 0
    model.eval()
    ans = []
    with torch.no_grad():
        for x, y in loader:
            inputs = Variable(x.type(device))
            scores = model(inputs)
            _, preds = scores.data.cpu().max(1)

            # To export answers
            values = preds.cpu().numpy()
            ans.extend(values.tolist())

            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        accuracy = float(num_correct) / num_samples
        print('Accuracy: (%.2f)' % (100 * accuracy))
    return ans

In [41]:
# convnet = ShuffleNetV2()
convnet = CNN()
convnet.cuda()

# loss function
loss_fn = nn.CrossEntropyLoss().type(device)
optimizer = optim.Adam(
    list(convnet.parameters()), lr=0.001, weight_decay=5e-4, betas=(0.9, 0.99))

train(convnet, loss_fn, optimizer, num_epochs=50)

Starting epoch: 1 / 50
t = 100, loss = 1.4839
t = 200, loss = 1.2872
t = 300, loss = 1.3412
t = 400, loss = 0.8611
t = 500, loss = 0.9829
t = 600, loss = 0.9628
t = 700, loss = 1.0430
Starting epoch: 2 / 50
t = 100, loss = 0.7547
t = 200, loss = 0.9673
t = 300, loss = 0.9506
t = 400, loss = 0.7634
t = 500, loss = 0.6585
t = 600, loss = 0.8277
t = 700, loss = 0.9160
Starting epoch: 3 / 50
t = 100, loss = 0.5334
t = 200, loss = 0.8791
t = 300, loss = 0.6889
t = 400, loss = 0.6099
t = 500, loss = 0.7906
t = 600, loss = 0.7397
t = 700, loss = 0.7349
Starting epoch: 4 / 50
t = 100, loss = 0.5976
t = 200, loss = 0.7948
t = 300, loss = 0.6707
t = 400, loss = 0.5771
t = 500, loss = 0.5224
t = 600, loss = 0.6420
t = 700, loss = 0.7429
Starting epoch: 5 / 50
t = 100, loss = 0.3827
t = 200, loss = 0.6106
t = 300, loss = 0.6311
t = 400, loss = 0.4884
t = 500, loss = 0.5797
t = 600, loss = 0.5618
t = 700, loss = 0.7788
Starting epoch: 6 / 50
t = 100, loss = 0.4329
t = 200, loss = 0.5717
t = 300, lo

In [48]:
ans = accuracyCheck(convnet, valloader)
np.save('ans2-uni.npy', np.array(ans))

Checking validation set accuracy
Accuracy: (86.90)



# Model Description - 

**Architecture Selection:**

- First implemented a simple convolutional model with one convolutional layer. 

- Initial architecture : [conv-relu-pool] x 2 - [affine] - [softmax]

- After the initial architecture various different architectures were tested, concluding the following final architure -  [conv-batchnorm-relu-pool] x 3 - [affine] - [softmax]

- Implementing Batch normalization resulted in improvement of training speed. 
- Tried adding dropout to the model. Since model didn't overfit, the accuracy was unaffected. 

**Activation Functions**

- Altrhought `Relu` is considered most popular in training CNNs, I used `Adam` optimizer here as it is considered fairly robust to the choice of hyper parameters. (Credits: [Standford Lecture Series](https://youtu.be/LxfUGhug-iQ))
- Adding parameters - `weight_decay` and  `betas` resulted in 5% improvement in the performance 
- Experimentation with learning rate didn't produce any substantial results.

For loss function standard `CrossEntropy` function is used.

**Experimentation**
- Kernels: Tried kernels of size 5 x 5 and 7 x 7, still 3 x 3 kernel gives out the best results
- Data Augmentation: Augmentated the training dataset horizontally to get variation in the dataset 
