# [CSCI 3397/PSYC 3317] Lab 7a: MLP and CNN with Pytorch

**Posted:** Monday, March 14, 2022

**Due:** Friday, March 18, 2022

__Total Points__: 3 pts

__Submission__: please rename the .ipynb file as __\<your_username\>\_lab7a.ipynb__ before you submit it to canvas. Example: weidf_lab7a.ipynb.

# 1.  MLP with Pytorch

To build a deep learning model in Pytorch, we need to 
- define the needed layers under `__init__()` 
- specify the model computation under `foward()`. 


The gradient computation is automatically done under the parent's `backward()` (can be overwritten if needed).

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLP_oneHiddenLayer(nn.Module):
    def __init__(self, input_dim, output_dim, num_neuron, nonlinear=F.relu):
        super(MLP_oneHiddenLayer, self).__init__()

        self.fc1 = nn.Linear(input_dim, num_neuron)
        self.fc2 = nn.Linear(num_neuron, output_dim)
        self.nonlinear = nonlinear    

    def forward(self, x):
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)
# print(MLP_oneHiddenLayer(4,3,2))

In [None]:
! pip install hiddenlayer

In [None]:
import hiddenlayer as hl

num_input, num_output, num_neuron = 10,20,1 
model = MLP_oneHiddenLayer(num_input, num_output, num_neuron)

hl.build_graph(model, torch.zeros([1, num_input]), transforms='')

## [1 pt] Exercise 1. N-layer MLP


**[TODO]** Fill in the missing code.

Let's build a MLP model with the specified number of hidden layers and number of neurons.

**Course material: Lecture 12, page 31-32**

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, num_neuron=[], nonlinear=F.relu):
        super(MLP, self).__init__()

        layers = []
        if len(num_neuron) == 0:
            layers += [nn.Linear(input_dim, output_dim)]
        else:
            # first layer
            layers += [nn.Linear(input_dim, num_neuron[0]), nn.ReLU()]
            
            #### YOUR CODE STARTS HERE ####

            #### YOUR CODE ENDS HERE ####
            
            # last layer
            layers += [nn.Linear(num_neuron[-1], output_dim)]

        # pytorch syntax to create layers from a list
        self.layers = nn.Sequential(*layers)
    
   
    def forward(self, x):
        x = torch.flatten(x, 1) # flatten all dimensions except batch    x = x.view(-1, 32*32*3)
        x = self.layers(x)
        return F.softmax(x, dim=1)


# test case 
num_input, num_output = 10,20
num_neuron = [128, 128, 128]
model_mlp = MLP(num_input, num_output, num_neuron)

hl.build_graph(model_mlp, torch.zeros([1, num_input]), transforms='')

# 2. CNN with Pytorch

Below is the detailed architecture details of the famous AlexNet. Let's go through the details of its layers by computing the input/output size. 
<img height=400 src="https://cdn.analyticsvidhya.com/wp-content/uploads/2021/03/Screenshot-from-2021-03-19-16-01-03.png"/>
<img height=400 src="https://cdn.analyticsvidhya.com/wp-content/uploads/2021/03/Screenshot-from-2021-03-19-16-01-13.png">


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

# this one is slightly different from the orignal AlexNet paper
alexnet = models.alexnet()
alexnet.modules

## (a) Conv Layer.
Below is the function to compute the output size of a convolutional layer.

**Course Material: Lecture 13, page 50**

In [None]:
import numpy as np

def getSizeConv(input_size, kernel_size, pad_size, stride_size):  
    # input_size: N x C_in x H x W
    # kernel_size: C_out x C_in x KH x KW
    # stride: [Sx, Sy]
    # pad: [Px, Py]
    # -----
    # output_size: N x C_out x OH x OW
    output_size = np.zeros(4)
    # 0: batch_size
    output_size[0] = input_size[0]
    # 1: channel size
    output_size[1] = kernel_size[0]
    # 2/3: spatial dimension (height/weight)
    output_size[2] = (input_size[2] + pad_size[0]*2 - kernel_size[2]) // stride_size[0] + 1
    output_size[3] = (input_size[3] + pad_size[1]*2 - kernel_size[3]) // stride_size[1] + 1

    return output_size.astype(int)


## test case
input_size = [10,3,227,227]
kernel_size = [96,3,11,11]
pad_size = [0,0]
stride_size = [4,4]

output_gt = [10,96,55,55]
output_pred = getSizeConv(input_size, kernel_size, pad_size, stride_size)

print('gt: ', output_gt)
print('pred: ', output_pred)
print('max abs diff: ', np.abs(output_pred-output_gt).max())

## (b) ReLU layer

In [None]:
def my_relu(input_tensor): 
    output_tensor = input_tensor.copy()
    output_tensor[output_tensor < 0] = 0
    return output_tensor

test_tensor = torch.randn([2,64,55,55])
relu1 = alexnet._modules['features'][1]

outReLU_my = my_relu(test_tensor.detach().numpy())
outReLU_pt = relu1(test_tensor).detach().numpy()

np.abs(outReLU_my - outReLU_pt).max()

## (c) Pooling layer
Below is the function to compute the output size of a pooling layer.

**Course Material: Lecture 13, page 68**

In [None]:
import numpy as np

def getSizePool(input_size, kernel_size, pad_size, stride_size):  
    # input_size: N x C_in x H x W
    # kernel_size: KH x KW
    # pad_size: [Px, Py]
    # stride_size: [Sx, Sy]  
    # -----
    # output_size: N x C_in x OH x OW
    if isinstance(kernel_size, int):
        kernel_size = [kernel_size, kernel_size]
    if isinstance(stride_size, int):
        stride_size = [stride_size, stride_size]
    if isinstance(pad_size, int):
        pad_size = [pad_size, pad_size]    

    output_size = np.zeros(4)
    # 0: batch_size
    output_size[0] = input_size[0]
    # 1: channel size
    output_size[1] = input_size[1]
    # 2/3: spatial dimension (height/weight)
    output_size[2] = (input_size[2] + pad_size[0]*2 - kernel_size[0]) // stride_size[0] + 1
    output_size[3] = (input_size[3] + pad_size[1]*2 - kernel_size[1]) // stride_size[1] + 1
    return output_size.astype(int)


## test case
input_size = [10,96,55,55]
kernel_size = [3,3]
stride_size = [2,2]
pad_size = [0,0]
output_gt = [10,96,27,27]
output_pred = getSizePool(input_size, kernel_size, pad_size, stride_size)

print('gt: ', output_gt)
print('pred: ', output_pred)
print('max abs diff: ', np.abs(output_pred-output_gt).max())

Function to implement the max-pool layer.

In [None]:
def my_pool(pool_ops, input_tensor, kernel_size, pad_size, stride_size):
    # input_tensor size: N x C_in x H x W
    # kernel_size: KH x KW
    # pad_size: [Px, Py]
    # stride_size: [Sx, Sy]
    # output_size: N x C_in x OH x OW
    output_size = getSizePool(input_tensor.shape, kernel_size, pad_size, stride_size)
    output_tensor = np.zeros(output_size)

    if isinstance(kernel_size, int):
        kernel_size = [kernel_size, kernel_size]
    if isinstance(stride_size, int):
        stride_size = [stride_size, stride_size]
    if isinstance(pad_size, int):
        pad_size = [pad_size, pad_size]    

    for x in range(output_size[2]):
        for y in range(output_size[3]):
            patch = input_tensor[:, :, x*stride_size[0]:x*stride_size[0]+kernel_size[0],\
                  y*stride_size[1]:y*stride_size[1]+kernel_size[1]]
            if pool_ops == 'max':
                output_tensor[:,:,x,y] = patch.max(axis=2).max(axis=2)
            elif pool_ops == 'avg':
                output_tensor[:,:,x,y] = patch.mean(axis=2).mean(axis=2)
    return output_tensor

test_tensor = torch.randn([2,64,55,55])
pool1 = alexnet._modules['features'][2]

outPool_my = my_pool('max', test_tensor.detach().numpy(), pool1.kernel_size, pool1.padding, pool1.stride)
outPool_pt = pool1(test_tensor).detach().numpy()

print('Max abs diff for maxpool', np.abs(outPool_my - outPool_pt).max())


## (d) Reshape Layer.
Below is the function to compute the output size of a reshape layer.

In [None]:
import numpy as np

def getSizeReshape(input_size):
    # input_size: N x ... (multi-dim)
    # -----
    # output_size: N x O
    output_size = np.array([input_size[0], np.prod(input_size[1:])])
    return output_size


## test case
input_size = [10, 256, 6, 6]
output_gt = [10, 9216]
output_pred = getSizeReshape(input_size)

print('gt: ', output_gt)
print('pred: ', output_pred)
print('max abs diff: ', np.abs(output_pred-output_gt).max())

## (e) Dropout layer

**Course materials: Lecture 13, page 72-74**

In [None]:
def my_drop(input_tensor, p):
    output_tensor = input_tensor.copy()
    rand = np.random.rand(output_tensor.shape[0], output_tensor.shape[1])
    output_tensor[rand<p] = 0  
    return output_tensor

test_tensor = torch.randn([100,4096])
drop1 = alexnet._modules['classifier'][0]

outDrop_my = my_drop(test_tensor.detach().numpy(), drop1.p)
outDrop_pt = drop1(test_tensor).detach().numpy()

num = float(test_tensor.nelement())
# doesn't have to be 0
np.abs((outDrop_my==0).sum()/num - (outDrop_pt==0).sum()/num)

## (f) **Exercise 2** [2 pts] FC layer.
Implement the function to compute the output size of a fully-connected (fc) layer.

**Course Material: Lecture 12, page 6**

In [None]:
import numpy as np

def getSizeFc(input_size, weight_size):  
    # input_size: N x L
    # weight_size: M x L
    # -----
    # output_size: N x M
    
    #### YOUR CODE STARTS HERE ####

    #### YOUR CODE ENDS HERE ####    
    
    return output_size


## test case
input_size = [10, 4096]
weight_size = [1000, 4096]
output_gt = [10,1000]
output_pred = getSizeFc(input_size, weight_size)

print('gt: ', output_gt)
print('pred: ', output_pred)
print('max abs diff: ', np.abs(output_pred-output_gt).max())

After you finished the exercise above, below is the function to implement the FC layer.

In [None]:
def my_fc(input_tensor, weight_tensor, bias_tensor):
    # input_tensor size: N x L
    # kernel_size: M x L  
    # output_size: N x M  

    output_size = getSizeFc(input_tensor.shape, weight_tensor.shape)
    output_tensor = np.zeros(output_size)
    for sample_id in range(input_tensor.shape[0]):
        # hint: numpy matrix multiplication is np.matmul
        # a*b is the element-wise multiplication
        #### YOUR CODE STARTS HERE ####

        #### YOUR CODE ENDS HERE ####        
    return output_tensor

test_tensor = torch.randn([2,4096])
fc7 = alexnet._modules['classifier'][4]

outFc_my = my_fc(test_tensor.detach().numpy(), fc7.weight.detach().numpy(), fc7.bias.detach().numpy())
outFc_pt = fc7(test_tensor).detach().numpy()

np.abs(outFc_my - outFc_pt).max()


## (e) Putting things together: here's the output size for each layer of AlexNet

In [None]:
alexNet = [\
    ['conv1',[96,3,11,11],[0,0],[4,4]],\
    ['pool1',[3,3],[0,0],[2,2]],\
    ['conv2',[256,96,5,5],[2,2],[1,1]],\
    ['pool2',[3,3],[0,0],[2,2]],\
    ['conv3',[384,256,3,3],[1,1],[1,1]],\
    ['conv4',[384,384,3,3],[1,1],[1,1]],\
    ['conv5',[256,384,3,3],[1,1],[1,1]],\
    ['pool5',[3,3],[0,0],[2,2]],\
    ['reshape'],\
    ['fc6',[4096,9216]],\
    ['fc7',[4096,4096]],\
    ['fc8',[1000,4096]]
    ]

tensor_size = [10,3,227,227]
tensor_size = [10,3,600,800]
for layer in alexNet:
    if 'conv' in layer[0]:
        layer_name, kernel_size, pad_size, stride_size = layer
        tensor_size = getSizeConv(tensor_size, kernel_size, pad_size, stride_size)
        print(layer_name, tensor_size)
    elif 'pool' in layer[0]:
        layer_name, kernel_size, pad_size, stride_size = layer
        tensor_size = getSizePool(tensor_size, kernel_size, pad_size, stride_size)
        print(layer_name, tensor_size)    
    elif 'reshape' in layer[0]:    
        tensor_size = getSizeReshape(tensor_size)
        print(layer[0], tensor_size)
    elif 'fc' in layer[0]:
        layer_name, weight_size = layer
        tensor_size = getSizeFc(tensor_size, weight_size)
    print(layer_name, tensor_size)