<a href="https://colab.research.google.com/github/abhishekv23/EE954-DL/blob/main/Q3_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implement a CNN backbone model using pytorch. (total 40 marks)

### a. Build a small CNN model consisting of
- 5 convolution Layers with each layer:
1. Convolution layer
2. ReLU activation
3. Max pooling layer

(10 Marks )

In [None]:
#import libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
from torchvision import datasets
from torchvision.transforms import ToTensor, transforms

from torch.utils.data import DataLoader



In [None]:
def FMNIST_DataLoader():
  #load MNIST data
  transform = transforms.Compose([
    transforms.ToTensor(),
    #transforms.Normalize((0.5,), (0.5,))
  ])
  train_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=transform)
  test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=transform)
  train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
  test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)
  return train_dataloader, test_dataloader

In [None]:
print(train_data.train_labels)
print(train_data.data[0])
train_data.train_data

In [None]:
x_train = train_data.data/255
x_test = test_data.data/255
y_train = train_data.targets
y_test = test_data.targets

In [None]:
(x_train, x_valid) = x_train[10000:], x_train[:10000]
(y_train, y_valid) = y_train[10000:], y_train[:10000]
y_train

In [None]:
a = np.array(y_train)
b = np.zeros((a.size, a.max() + 1))
b[np.arange(a.size), a] = 1
b

In [None]:
a1 = np.array(y_valid)
b1 = np.zeros((a1.size, a1.max() + 1))
b1[np.arange(a1.size), a1] = 1
y_train = torch.tensor(b)
y_valid = torch.tensor(b1)
y_valid

In [None]:
x_train[0:1].shape

In [None]:
y_train[0]

In [None]:
class CNN_Model(nn.Module):

    #define init function
    def __init__(self):
        super(CNN_Model, self).__init__()

        #input size. 28 channel - 1,
        #32 - output channel from this layer,
        #square kerner of size 3.
        #stride defaults to 1 and padding to 0.
        self.layer1_conv = nn.Conv2d(1, 16, 2, padding=1)

        self.layer1_activ = nn.ReLU()
        self.layer1_maxpool = nn.MaxPool2d(2, stride=1) #2x2 kernel

        #layer 2, input size
        self.layer2_conv = nn.Conv2d(16, 32, 2, padding=1)
        self.layer2_activ = nn.ReLU()
        self.layer2_maxpool = nn.MaxPool2d(2, stride=1)
        #layer 3
        self.layer3_conv = nn.Conv2d(32, 64, 2, padding=1)
        self.layer3_activ = nn.ReLU()
        self.layer3_maxpool = nn.MaxPool2d(2, stride=2) #14x14x64
        #layer 4
        self.layer4_conv = nn.Conv2d(64, 128, 2, padding=1)
        self.layer4_activ = nn.ReLU()
        self.layer4_maxpool = nn.MaxPool2d(2, stride=2) #7x7x128
        #layer 5
        self.layer5_conv = nn.Conv2d(128, 128, 2, padding=1)
        self.layer5_activ = nn.ReLU()
        self.layer5_maxpool = nn.MaxPool2d(2, stride=2) #4x4x256
        #Dense layer, input size=256*

        self.dense1 = nn.Linear(2048, 20)
        #output layer
        self.out = nn.Linear(20, 10)

    def forward(self, x):

        #pass data x through 1st layer
        x = self.layer1_conv(x)
        #print(f"shape of data after layer1_conv: {x.shape}")
        x = self.layer1_activ(x)
        x = self.layer1_maxpool(x)
        #print(f"shape of data after layer1_maxpool: {x.shape}")

        #pass data through 2nd layer
        x = self.layer2_conv(x)
        #print(f"shape of data after layer2_conv: {x.shape}")
        x = self.layer2_activ(x)
        x = self.layer2_maxpool(x)
        #print(f"shape of data after layer2_maxpool: {x.shape}")

        #pass data through 3rd layer
        x = self.layer3_conv(x)
        #print(f"shape of data after layer3_conv: {x.shape}")
        x = self.layer3_activ(x)
        x = self.layer3_maxpool(x)
        #print(f"shape of data after layer3_maxpool: {x.shape}")

        #pass data through 4th layer
        x = self.layer4_conv(x)
        #print(f"shape of data after layer4_conv: {x.shape}")
        x = self.layer4_activ(x)
        x = self.layer4_maxpool(x)
        #print(f"shape of data after layer4_maxpool: {x.shape}")

        #pass data through 5th layer
        x = self.layer5_conv(x)
        #print(f"shape of data after layer5_conv: {x.shape}")
        x = self.layer5_activ(x)
        x = self.layer5_maxpool(x)
        #print(f"shape of data after layer5_maxpool: {x.shape}")
        #print(len(x[1]))
        #print(len(x[1][1]))
        #print(len(x[1][1][1]))

        #flatten
        x = x.view(-1, 128*4*4)

        #x = self.flatten = torch.flatten(x)
        #This method will return flattened data that will be passed to Dense layer from Q2
        #following 2 lines will be commented after testing.
        #print(f"shape of data after flatten: {x.shape}")
        x = self.dense1(x)
        x = self.out(x)
        return x

In [None]:
#Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
random_data = torch.rand((32, 1, 28, 28))

cnn_model = CNN_Model()

output_data = cnn_model(random_data)
print (output_data)


In [None]:
tainable_param = 0

for name, param in cnn_model.named_parameters():
    print(name, param.numel())
    trainable_param += param.numel()

print(f"Total trainable parameters {trainable_param}")

In [None]:
for name, param in cnn_model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

### b. Experiment with different kernel size, number of kernel each layer (10 Marks)
(keep number of filter same in each layer, double it in each layer etc) and settle with a combination which performs the best for the given problem.

In [None]:
#Train the model
def cnn_model_train(train_dataloader, cnn_model, loss_func, optimizer):
    train_data_size = len(train_dataloader.dataset)
    #set the model to training mode
    cnn_model.train()

    for batch, (x_train, y_train) in enumerate(train_dataloader):
      batch = batch+1
      y_predict = cnn_model(x_train)
      loss = loss_func(y_predict, y_train)

      #backpropagate the prediction loss
      loss.backward()
      #adjust the parameters
      optimizer.step()
      #to reset the gradients of model parameters. Gradients by default add up;
      #to prevent double-counting, we explicitly zero them at each iteration.
      optimizer.zero_grad()

      #printout training metrics after batch of 100
      if batch % 100 ==0:
        loss = loss.item()
        print(f"Train loss: {loss}")


In [None]:
#Test the model
def cnn_model_test(test_dataloader, cnn_model, loss_func):
    #set the model to evaluation (important for BN and Dropout layers)
    cnn_model.eval()
    num_batches = len(test_dataloader)

    #initialize
    test_loss, correct = 0, 0
    #ensure that no grad are computed during test mode
    with torch.no_grad():
        for batch, (x_valid, y_valid) in enumerate(test_dataloader):
          batch = batch+1
          predict = cnn_model(x_valid)
          test_loss = loss_func(predict, y_valid)
          if batch % 100 ==0:
            test_loss = test_loss/num_batches
            print(f"Test loss: {test_loss}")

In [None]:
#
cnn_model = CNN_Model()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)
train_dataloader, test_dataloader = FMNIST_DataLoader()


### c. Try different weight initialization methods (random, Xavier, He) (5 Marks)

In [None]:
# for block is for unit testing only NOT BE EXECUTED IN ACTUAL RUN
for i, (x,y) in enumerate(train_dataloader):
    print(i)
    #print(x[0])
    #print(y[0])
    break


In [None]:
epochs = 2
for i in range(epochs):
  print(f"epoch {i+1}")
  cnn_model_train(train_dataloader, cnn_model, loss_func, optimizer)
  cnn_model_test(test_dataloader, cnn_model, loss_func)

epoch 1
Test loss: 0.0014774209121242166
Test loss: 0.0008170984801836312
Test loss: 0.0017778313485905528
epoch 2
Test loss: 0.0014774209121242166
Test loss: 0.0008170984801836312
Test loss: 0.0017778313485905528


### d. After extracting feature from CNN model use MLP for classification (15 Marks)

In [None]:
#(use code from question 2)