In [None]:
# Import necessary libraries
import torch
from PIL import Image
import torchvision.transforms as transforms
import math
import numpy as np
import pickle
from decimal import Decimal
import matplotlib.pyplot as plt
torch.set_printoptions(precision=8)

***Loading Images Data***\
Change the file name varibale to the path where the CIFAR-10 images data_batch_1 is stored.

In [None]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo,encoding='bytes')
    return dict

file = "./cifar-10-python/cifar-10-batches-py/data_batch_1"
dict = unpickle(file)

Keys = list(dict.keys())
Images_original = dict[Keys[2]]
Labels = dict[Keys[1]]
Images = []
Images_list = [[] for i in range(10)]

for index,i in enumerate(Images_original):
    tensor = torch.tensor(i.reshape(3,32,32))
    tensor = tensor.type(torch.FloatTensor)
    tensor = tensor/255
    Images.append(tensor)
    Images_list[Labels[index]].append(tensor)


# plt.imshow(Images[4].permute(1, 2, 0), interpolation='nearest')

#Dividing the images into their respective Classes

# plt.imshow(Images_original[2].reshape(3,32,32))

***List of our Activation Functions***

In [None]:
def sigmoid_func(x):
    return torch.exp(x)/(1 + torch.exp(x))

def ReLU(x):
    return torch.maximum(torch.zeros(x.shape),x)


def ParaReLU(x,alpha = 0.01):
    return torch.maximum(torch.zeros(x.shape),x) + alpha*torch.minimum(torch.zeros(x.shape),x)


def tanh(x):
    return (torch.exp(x) - torch.exp(-x))/(torch.exp(x) + torch.exp(-x))

***Question 1***\
Here we write our convulution function which takes input image as a tensor, kernel, padding, stride, and \
activation function.\
Note that since we are providing only one filter the output will be single channel.

In [None]:
def convulution_function(input_tensor,kernel,padding=(0,0),stride=(1,1),activation_func = sigmoid_func):
    #First we pad the tensor
    #input tensor  : C * H * W
    Shape = input_tensor.shape
    C = Shape[0]
    H = Shape[1]
    W = Shape[2]

    ker_shape = kernel.shape
    kernel_size = (ker_shape[1],ker_shape[2])

    #Adding coloumns to tensor
    col_zero_tensor = torch.zeros((C,H,padding[1]))
    final_tens = torch.cat((input_tensor,col_zero_tensor),-1)
    final_tens = torch.cat((col_zero_tensor, final_tens),-1)

    #Size of the tensor changes now
    W = W + 2*padding[1]

    #Adding rows to the tensor
    row_zero_tensor = torch.zeros((C,padding[0],W))
    final_tens = torch.cat((final_tens, row_zero_tensor),1)
    final_tens = torch.cat((row_zero_tensor,final_tens),1)

    #Now the shape of our tensor changes
    Shape = final_tens.shape
    C = Shape[0]
    H = Shape[1]
    W = Shape[2]


    #Now we convolve using kernels

    #Defining our output tensor
    H_fin = math.floor(((H - kernel_size[0])/stride[0]) + 1)
    W_fin = math.floor(((W - kernel_size[1])/stride[1]) + 1)
    Final_tensor = torch.rand(1,H_fin,W_fin)
    Final_tensor = Final_tensor.type(torch.FloatTensor)

    #Main convolving Loop
    i1 = 0
    for i in range(0, H-kernel_size[0]+1, stride[0]):
        j1 = 0
        for j in range(0,W-kernel_size[1]+1,stride[1]):
                dummy = torch.zeros((C,kernel_size[0],kernel_size[1]))
                dummy = final_tens[:C+1,i:i+kernel_size[0],j:j+kernel_size[1]]
                sum = torch.mul(dummy,kernel).sum()
                Final_tensor[0][i1][j1] = sum
                j1 = j1 + 1
        i1 = i1 + 1
    
    return activation_func(Final_tensor)

***Printing outputs for various activation functions***

In [None]:
input_activation_map = torch.tensor([[-1,-2,-3],[4,-5,6],[-7,8,9]]).reshape(1,3,3)
kernel = torch.rand(1,2,2)
padding = (0,0)
stride = (1,1)

#printing outputs with various activation funcs
print("Sigmoid activation = ")
print(convulution_function(input_activation_map,kernel,padding,stride,sigmoid_func))

print("ReLU activation = ")
print(convulution_function(input_activation_map,kernel,padding,stride,ReLU))

print("Parametric Relu activation = ")
print(convulution_function(input_activation_map,kernel,padding,stride,ParaReLU))

print("tanh activation = ")
print(convulution_function(input_activation_map,kernel,padding,stride,tanh))

***Printing output for Question1***

In [None]:
input_activation_map = Images[0]
kernel = torch.rand(3,3,3,) #Kernel of size 3*3 the first 3 is for number of channels.
padding = (0,0)
stride = (1,1)
activation_func = sigmoid_func
output_activation_map = convulution_function(input_activation_map,kernel,padding,stride,activation_func)

#Displaying Results
print("Input Image =")
plt.imshow(Images[0].permute(1,2,0))
plt.show()

print("Filter kernel =")
print(kernel)


print("Output Activation Map=")
print(output_activation_map)

***Question2***\
Here we define our Pooling functions and our Pooling function which performs the actual pooling.
The pooling function accepts output of the convulution function,stride,pooling function and kernel size.\
Since output of convulution function is bsingle channel output of pooling function is also single channel.

In [None]:
#Defining our pooling functions

def max_pool(x):
    return torch.max(x)

def avg_pool(x):
    return x.sum()/(x.size(1) * x.size(0) * x.size(2))

def global_avg_pool(x):
    stride = [0,0]
    shape = x.shape
    stride[0] = shape[1]
    stride[1] = shape[2]
    return stride

def Pooling(input_tensor, stride, pooling_func = global_avg_pool,pool_size = []):
    if pool_size == []:
        pool_size = [stride[0],stride[1]]
    Shape = input_tensor.shape
    C = Shape[0]
    H = Shape[1]
    W = Shape[2]
    H_fin = math.floor(((H - pool_size[0])/stride[0]) + 1)
    W_fin = math.floor(((W - pool_size[1])/stride[1]) + 1)
    Final_tensor = torch.rand(1,H_fin,W_fin)


    i1 = 0
    for i in range(0, H-pool_size[0]+1, stride[0]):
        j1 = 0
        for j in range(0,W-pool_size[1]+1,stride[1]):
                dummy = torch.zeros((C,pool_size[0],pool_size[1]))
                dummy = input_tensor[:C+1,i:i+pool_size[0],j:j+pool_size[1]]
                sum = pooling_func(dummy)
                Final_tensor[0][i1][j1] = sum
                j1 = j1 + 1
        i1 = i1 + 1

    return Final_tensor  

***Printing Outputs for Question2***


In [None]:
input_pool = output_activation_map #Input for pooling is output from previous function
stride = (3,3)
kernel_size = (2,2)
pooling_func = max_pool
output_pool = Pooling(input_pool,stride,pooling_func,kernel_size)

#Printing the required values
print("Input activation map= ")
print(input_pool)

print("output activation map=")
print(output_pool)


***Question3***\
 Here we write our convulution layer function which takes input image as a tensor,number of fiters, kernel_size, padding, stride, and \
activation function,(optional kernels as well).\
Number of channels in the output will be number of filters.

In [None]:
def Convolution_Layer_Function(input_tensor,number_filters,kernel_size,padding=(0,0),stride=(1,1),activation_func=sigmoid_func,kernels = []):
    #Shape of our input_tensor
    Shape = input_tensor.shape
    C = Shape[0]
    H = Shape[1]
    W = Shape[2]

    #Defining our kernels
    if kernels == []:
        kernels = torch.rand(number_filters,C,kernel_size[0], kernel_size[1])
    
    for i in range(number_filters):
        if i == 0:
            final_tensor = convulution_function(input_tensor,kernels[i],padding,stride,activation_func)
        else:
            temp_tensor = convulution_function(input_tensor,kernels[i],padding,stride,activation_func)
            final_tensor = torch.cat((final_tensor,temp_tensor))
    return final_tensor

***Printing Output for Question3***

In [None]:
input_activation_map = Images[0]
number_filters = 8
kernel_size = (3,3)
kernel = torch.rand(8,input_activation_map.shape[0],3,3)/500
stride = (1,1)
padding = (0,0)
activation_func = sigmoid_func
output_activation_map = Convolution_Layer_Function(input_activation_map,number_filters,kernel_size,padding,stride,activation_func,kernel)
#printing the values
print("Input activation map = ")
print(input_activation_map)

print("Filter kernels=")
print(kernel)

print("Output activation map = ")
print(output_activation_map)

***Verifying the size of the output and input.***\
Here we are taking number of filters = 8 and kernel_size = (3,3)\
size of image = 3 x 32 x 32\
since padding = (0,0) and stride = (1,1)\
expected height of output = 32 - 3 + 1= 30\
expected width of output = 32 - 3 + 1 = 30 \
Number of channels of output = 8

In [None]:
#Verifying Size
print("expected channel size of output=",number_filters)
expected_height = math.floor(((input_activation_map.shape[1] - kernel_size[0] + 2*padding[0])/stride[0]) + 1)
print("expected height of output = ",expected_height)
expected_width = math.floor(((input_activation_map.shape[2] - kernel_size[1] + 2*padding[1])/stride[1]) + 1)
print("expected width of output = ",expected_width)


print("channel size of output=",output_activation_map.shape[0])
print("height of output = ",output_activation_map.shape[1])
print("width of output = ",output_activation_map.shape[2])

***Question 4***\
Accepts an input tensor, stride, pooling function and pool_size. For Global Average Pooling just input tensor and\
 pool_func (global_avg_pool in our case are enough). Since we do not change the number of channels in case of pooling the number of channels in the input and in the output remains the same.

In [None]:
def Pooling_Layer_Function(input_tensor,stride = [],pool_func = global_avg_pool,pool_size = []):
    if stride == []:
        stride = [input_tensor.shape[1],input_tensor.shape[2]]
    if pool_size == []:
        pool_size = [stride[0],stride[1]]
    Shape = input_tensor.shape
    C = Shape[0]
    H = Shape[1]
    W = Shape[2]

    if(pool_func == global_avg_pool):
        pool_size = global_avg_pool(input_tensor)
        pool_func = avg_pool

    H_fin = math.floor(((H - pool_size[0])/stride[0]) + 1)
    W_fin = math.floor(((W - pool_size[1])/stride[1]) + 1)
    Final_tensor = torch.rand(C,H_fin,W_fin)

    for c in range(C):
        Final_tensor[c] = Pooling(input_tensor[c].reshape(1,H,W),pool_size,pool_func)

    return Final_tensor

***Printing values for Question4***\
Let us take the output of question 3 as our input with stride = 2 and kernel size =2

In [None]:
input_pool = output_activation_map
kernel_size = (2,2)
stride = (2,2)
pool_func = max_pool
output_pool = Pooling_Layer_Function(input_pool,stride,max_pool,kernel_size)

#Printing the values
print("Input map = ")
print(input_pool)

print("Output map = ")
print(output_pool)


***Verification of Global Average Pooling Layer***\
Take input tensor =[[1,2],[3,4]]\
Then by applying global average pooling we need to get a single element tensor with 2.5 as answer.


In [None]:
input_tensor = torch.tensor([[1,2],[3,4]]).reshape(1,2,2)
output_tensor = Pooling_Layer_Function(input_tensor,pool_func = global_avg_pool)

print(output_tensor)

***Question 5***\
Takes input an activation map and prints out a vector of specified size by the user.

In [None]:
def Flatenning(input_tensor,output_size,W=[]):
    input_tensor  = input_tensor.reshape(-1)
    input_tensor = input_tensor.type(torch.FloatTensor)
    if W == []:
        W = torch.rand(input_tensor.size(0),output_size)/500

    final = torch.t(W) @ input_tensor

    return final

***Printing output for Question5***\
Let us take output of the previous question as our input

In [None]:
input_flatenning = output_pool
output_size = 10
output_flatenning = Flatenning(input_flatenning,output_size)

#Printing the outputs
print("Output vector = ")
print(output_flatenning)

print("input activation map = ")
print(input_flatenning)

***Question 6***\
First we define the softmax function and the MLP Layer function the MLP function accepts an input vector, number of hidden layers\
sizes of hidden layers, output_size and activation func

In [None]:
def softmax(x):
    x = torch.exp(x)
    x = x/(x.sum())
    return x


def MLP(input_tensor, number_hidden_layers,hidden_layer_sizes,output_size,activation_func,Weights = []):
    total_layers = []
    total_layers.append(input_tensor.size(0))
    for a in hidden_layer_sizes:
        total_layers.append(a)
    total_layers.append(output_size)

    total_no_weights = number_hidden_layers + 1

    if Weights == []:
        for i in range(total_no_weights):
            weight = torch.rand(total_layers[i],total_layers[i+1])
            Weights.append(weight)

    final_tensor = input_tensor

    for j in range(total_no_weights-1):
        final_tensor = final_tensor.type(torch.FloatTensor)
        final_tensor = activation_func(torch.t(Weights[j]) @ final_tensor)

    final_tensor = final_tensor.type(torch.FloatTensor)
    final_tensor = torch.t(Weights[-1]) @ final_tensor
    
    return softmax(final_tensor),final_tensor
    

***Printing the outputs of Question6***

In [None]:
#Let us take the input vector as output of the previous question
input_vector = output_flatenning
number_hidden_layers = 4
hidden_layer_sizes = [4,6,8,10]
output_size  = 10
activation_func = sigmoid_func
output_softmax, output_nosoft = MLP(input_vector,number_hidden_layers,hidden_layer_sizes,output_size,activation_func)

#Printing values
print("Output with no softmax = ")
print(output_nosoft)

print("Output with softmax = ")
print(output_softmax)

print("Sum of elements in the final tensor with softmax = ")
print(output_softmax.sum())

***Question7***\
Here we write a class for our CNN Architecture with feed forward method which will perform the actual feed forwards\
The output of this method will be the final tensor with the softmax applied.\
Our model while initialization takes input the number of input channels and size of output vector.


In [None]:
class CNN_Arch:
    Weights = []
    Final_output = None
    Input_channels = None    
    Output_channels = None

    def __init__(self,input_channels,output_channels):
        #Filling our variables
        self.Input_channels = input_channels
        self.output_channels = output_channels


        #Kernels for 1st CNN layer
        Kernel = torch.rand(16,input_channels,3,3)/100
        self.Weights.append(Kernel)

        #Kernels for 2nd CNN layer
        Kernel2 = torch.rand(8,16,3,3)/100
        self.Weights.append(Kernel2)

        #Weights for flattenning
        weight3 = torch.rand(8,8)
        self.Weights.append(weight3)

        #Weights for MLP Layer
        weight1 = torch.rand(8,8)
        self.Weights.append(weight1)

        weight2 = torch.rand(8,output_channels)
        self.Weights.append(weight2)
    
    def feed_forward(self,input_image):
        #1st convolution layer
        final_output = Convolution_Layer_Function(input_image,16,(3,3),(0,0),(1,1),sigmoid_func,self.Weights[0])

        #1st pooling layer
        final_output = Pooling_Layer_Function(final_output,(2,2),max_pool)

        #2nd covolution layer
        final_output = Convolution_Layer_Function(final_output,8,(3,3),(0,0),(1,1),sigmoid_func,self.Weights[1])

        #2nd Pooling Layer
        final_output = Pooling_Layer_Function(final_output,(2,2),max_pool)

        #Global Average Pooling Layer
        final_output = Pooling_Layer_Function(final_output,(1,1),global_avg_pool)

        #Flatenning the input
        final_output = Flatenning(final_output,8,self.Weights[2])
        flatenning_output = final_output
        #Passing to MLP
        final_output,final_output_nosoft = MLP(final_output,1,[final_output.size(0)],10,sigmoid_func,[self.Weights[3],self.Weights[4]])
        # print(final_output)

        return final_output,flatenning_output

***Printing Outputs for Question7***\
Verifying the input and output sizes

In [None]:
input_image = Images[0]
input_channels = input_image.shape[0]
output_size = 10
model = CNN_Arch(input_channels,output_size)
output,flatenning_output = model.feed_forward(input_image)

#Printing output
print("Shape of input image tensor = ",input_image.shape)


print("Output = ")
print(output)

***Question 8a***\
Printing output from each class.

In [None]:
#Model takes in the input number of input channels and size of output layer
model = CNN_Arch(3,10)

for i in range(10):
    final_output,flatenning_output = model.feed_forward(Images_list[i][0])
    print("Output for class",i,"=",final_output)

***Analysis of Question 8a***\
We can see that after printing the values, irrespective of the image class there is only a little diffrence in the output vectors.\
This is because our model is still untrained and hence outputs almost same vector for every image.

***Question 8b***\
First we need to get our data of bottleneck into an array so that we can perform PCA.


In [None]:
Cells_PCA = []
Labels_PCA = []

for i in range(10):
    for j in range(3):
        final_output,bottleneck_output = model.feed_forward(Images_list[i][j])
        Cells_PCA.append(bottleneck_output.numpy())
        Labels_PCA.append(i)

Cells_PCA = np.array(Cells_PCA)
print(Cells_PCA.shape)

After getting our data we can perform the Analysis.

In [None]:
from sklearn.decomposition import PCA

#Since we need to draw graph in 2-dimension n_components will be 2
PCA_model = PCA(n_components=2)
reduced_data = PCA_model.fit_transform(Cells_PCA)

#Plotting our reduced data.
plt.scatter(reduced_data[:,0],reduced_data[:,1], c = Labels_PCA, cmap = plt.cm.get_cmap('prism',10))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.show()

***Analysis for Question 8b***\
As we can see after performing PCA we can see that even images of same classes are far part (i.e their bottleneck outputs are diffrent.)\
If they were similar then clusters of one color should have formed but this is not the case since our model is untrained and just randomly initialised.\
\
Hence we can say that a randomly initialized neural network does not show any discriminabilty.