In [58]:
import numpy as np
# from numba import njit
import time

In [59]:
def normalise(v):
    return (v - np.mean(v))/np.std(v)

In [60]:
def softmax(v):
    v = v - np.max(v, axis = 1).reshape(-1,1)
    exp_array = np.exp(v)
    return exp_array / np.sum(exp_array, axis = 1).reshape(-1,1)

In [61]:
def create_filter(n, s) :
      return np.zeros((n, s, s))

In [62]:
def relu(map):    
    output_tensor = np.maximum(map, 0)
    return output_tensor

In [63]:
def pooling(map, dim):
    n_sample = map.shape[0]
    n_channels = map.shape[1]
    map_size = map.shape[2]
    output_size = map.shape[2]//dim

    return map.reshape(n_sample, n_channels, output_size, dim, output_size, dim).max(axis=(3,5))

In [64]:
# @njit
def convolution(sample, kernel):
    kernel_size = kernel.shape[1]
    pad = kernel_size//2
    n_sample, n, h, w = sample.shape
    
    out_sample = np.zeros((n_sample, n, h+2*pad, w+2*pad))
    for j in range(n_sample):
        for i in range(n):
            out_sample[j][i] = np.pad(sample[j][i], (pad,), 'constant', constant_values = 0)

    sample = out_sample
    
    size_feature_map = h # output dimension for same convolution
    n_out_channels = kernel.shape[0] # number of output channels is the number of filters (for now)

    #now we perform the convolution

    # initializing the output tensor to zeros
    output_tensor = np.zeros((n_sample, n_out_channels, size_feature_map, size_feature_map))

    # loop for all the kernels
    for i in range(n_out_channels):
        current_kernel = kernel[i] # get the required kernel

        for r in range(size_feature_map): # for the rows
            for c in range(size_feature_map): # for the columns
                window = sample[:, :, r : r + kernel_size, c : c + kernel_size] #take the window
                value = np.sum(window*current_kernel, axis = (1,2,3)) # multiply with the kernel and sum  up the value
                output_tensor[:, i, r, c] = value # update the result tensor

    return output_tensor # return the result tensor

In [65]:
# @njit
def other_convolution(sample, kernel, pad):
    kernel_size = kernel.shape[1]
    n_sample, n, h, w = sample.shape
    
    out_sample = np.zeros((n_sample, n, h+2*pad, w+2*pad))
    
    for j in range(n_sample):
        for i in range(n):
            out_sample[j][i] = np.pad(sample[j][i], (pad,), 'constant', constant_values = 0)

    sample = out_sample
    
    size_feature_map = h + 2*pad - kernel_size + 1 # output dimension for same convolution
    n_out_channels = kernel.shape[0] # number of output channels is the number of filters (for now)

    #now we perform the convolution

    # initializing the output tensor to zeros
    output_tensor = np.zeros((n_sample, n_out_channels, size_feature_map, size_feature_map))

    # loop for all the kernels
    for i in range(n_out_channels):
        current_kernel = kernel[i] # get the required kernel

        for r in range(size_feature_map): # for the rows
            for c in range(size_feature_map): # for the columns
                window = sample[:, :, r : r + kernel_size, c : c + kernel_size] #take the window
                value = np.sum(window*current_kernel, axis = (1,2,3)) # multiply with the kernel and sum  up the value
                output_tensor[:, i, r, c] = value # update the result tensor

    return output_tensor # return the result tensor

In [66]:
class conv2d:
    def __init__(self, num, size):
        self.kernel = np.random.randn(num, size, size)/9 # Xavier Initialisation
        self.bias = np.random.rand(num, size, size)/9
        self.layer_input = None
        self.layer_output = None
        self.layer_activated = None
        self.kernel_grad = None
        self.size = size
    
    def forward_pass(self, sample): # DONE
        # now I am getting multiple images
        # so another dimension has been added to sample
        
        # input is fine
        self.layer_input = sample
        
        # convolution also fixed to accomodate multiple samples
        output_tensor = convolution(sample, self.kernel)
    
        self.layer_output = output_tensor
        self.layer_activated = relu(output_tensor) # relu is totally fine
        
        return self.layer_activated # return the result tensor
        
    def backward_pass(self, inp_grad): # DONE
        n_sample, n, h, w = self.layer_input.shape
        pass_grad = np.zeros((n_sample, n, h, w))
        
        relu_mat = self.layer_activated
        relu_mat[np.nonzero(relu_mat)] = 1
        
        inp_grad = inp_grad * relu_mat # must be dimensionally equivalent
        other_grad = np.sum(inp_grad, axis = 0)/32
        
        kernel_grad = other_convolution(self.layer_input, other_grad, self.size//2)
        # other convolution also fixed
        
        self.kernel_grad = np.sum(kernel_grad, axis = 0)/32
        
        flip_kernel = np.flip(self.kernel_grad, axis=(0,1))
        not_final = convolution(inp_grad, flip_kernel)
        still_not_final = np.sum(not_final, axis = 1)
        
        
        # for j in range(n_sample):
        #     for i in range(n):
        #         pass_grad[j][i] = still_not_final
                
        for i in range(n):
                pass_grad[:, i] = still_not_final
        
        return pass_grad
        
    def update(self): # DONE
        # print(self.kernel_grad.shape)
        self.kernel -= 0.001*self.kernel_grad # applying gradient descent
        return None
        

In [67]:
class maxpool2d:
    def __init__(self, dim):
        self.dim = dim
        self.layer_input = None
        self.layer_output = None
        
    def forward_pass(self , sample):
        self.layer_input = sample
        self.layer_output = pooling(sample, 2)
        return self.layer_output
    
    # @njit
    def backward_pass(self, inp_grad):
        n_sample, n, h, w = self.layer_input.shape
        x = self.layer_input
        
        pass_mat = np.zeros((n_sample, n,h,w))
    
        # for j in range(n_sample):
        #     for i in range(n):
        #         for r in range(0, h-1, 2):
        #             for c in range(0, w-1, 2):
        #                 window = x[j, i, r:r+2, c:c+2]
        #                 max_ind = np.unravel_index(window.argmax(), window.shape)
        #                 pass_mat[j, i, r:r+2, c:c+2][max_ind] = inp_grad[j, i, r//2, c//2]
        
        for i in range(h//2):
            for j in range(w//2):
                h_start = i * 2
                h_end = h_start + 2
                w_start = j * 2
                w_end = w_start + 2
                
                X = self.layer_input
                X_pool = X[:, :, h_start:h_end, w_start:w_end]
                mask = (X_pool == np.max(X_pool, axis=(2, 3))[:, :, None, None])
                pass_mat[:, :, h_start:h_end, w_start:w_end] += mask * (inp_grad[:, :, i, j])[:, :, None, None]
                
            
        return pass_mat

In [68]:
class fc_1:
    def __init__(self, size, next_size):
        self.weights = np.random.randn(next_size, size)/9
        self.bias = np.random.randn(next_size, )/9
        self.weights_grad = None
        self.bias_grad = None
        self.layer_input = None
        self.layer_output = None
        self.layer_output_active = None
        
    def forward_pass(self, sample):
        self.layer_input = sample
        
        # need to calculate this correctly
        output = sample @ self.weights.T + self.bias
        self.layer_output = output
        self.layer_output_active = relu(output)
        
        return self.layer_output_active
    
    def backward_pass(self, inp_grad):
        relu_mat = self.layer_output_active
        relu_mat[np.nonzero(relu_mat)] = 1
        # print(relu_mat.shape)
        
        relued_grad = inp_grad * relu_mat
        
        pass_grad = relued_grad @ self.weights
        
        self.weights_grad = np.sum(relued_grad.T @ self.layer_input, axis = 0)/32
        self.bias_grad = np.sum(relued_grad, axis = 0)/32
        
        return pass_grad
    
    def update(self):
        self.weights -= 0.001*self.weights_grad
        self.bias -= 0.001*self.bias_grad

In [69]:
class fc_2:
    def __init__(self, size, next_size):
        self.weights = np.random.randn(next_size, size)/9
        self.bias = np.random.randn(next_size, )/9
        self.weights_grad = None
        self.bias_grad = None
        self.layer_input = None
        self.layer_output = None
        
    def forward_pass(self, sample):
        self.layer_input = sample
        output = sample @ self.weights.T + self.bias
        self.layer_output = output
        return self.layer_output
    
    def backward_pass(self, inp_grad):
        pass_grad = inp_grad @ self.weights #this is still missing the RELU gradient
        
        self.weights_grad = np.sum(inp_grad.T @ self.layer_input, axis = 0)/32
        self.bias_grad = np.sum(inp_grad, axis = 0)/32
        
        return pass_grad
    
    def update(self):
        self.weights -= 0.001*self.weights_grad
        self.bias -= 0.001*self.bias_grad
    

In [70]:
a = np.array(([[[1, 1, 2, 4], [1, -1, 1, -1], [1, 1, 1, 1], [1, -1, 1, -1]], [[2, 2, 2, 0], [2, 0, 2, 2], [0, 2, 2, 2], [2, 2, 0, 2]], [[3, 3, 3, 3], [30, 3, 3, 30], [3, 3, 3, -5], [3, 3, 3, -5]]], [[[1, 1, 2, 4], [1, -1, 1, -1], [1, 1, 1, 1], [1, -1, 1, -1]], [[2, 2, 2, 0], [2, 0, 2, 2], [0, 2, 2, 2], [2, 2, 0, 2]], [[3, 3, 3, 3], [30, 3, 3, 30], [3, 3, 3, -5], [3, 3, 3, -5]]]))
b = np.array(([[[5,5,5], [5, 7,5], [5,5,5]], [[5,4,5], [5, 5,5], [5,5,5]], [[5,5,5], [5, 5,5], [5,5,5]]]))
c = np.array([[6], [6], [6]])
d = np.array([[5,5,5], [5, 5,5], [5,5,5]])
# np.pad(a[0], (1,), 'constant', constant_values = 0)
# a.shape
convolution(a, b)
# np.tensordot(b,c, axes = (1, 0))
# np.dot(d, c)
# pooling(a, 2)

array([[[[247., 312., 309., 269.],
         [351., 384., 357., 337.],
         [283., 367., 277., 201.],
         [112., 158.,  78.,  12.]],

        [[235., 300., 295., 255.],
         [279., 374., 338., 268.],
         [242., 353., 259., 174.],
         [ 96., 144.,  64.,  22.]],

        [[235., 300., 295., 255.],
         [285., 380., 345., 275.],
         [275., 355., 265., 205.],
         [100., 150.,  70.,  20.]]],


       [[[247., 312., 309., 269.],
         [351., 384., 357., 337.],
         [283., 367., 277., 201.],
         [112., 158.,  78.,  12.]],

        [[235., 300., 295., 255.],
         [279., 374., 338., 268.],
         [242., 353., 259., 174.],
         [ 96., 144.,  64.,  22.]],

        [[235., 300., 295., 255.],
         [285., 380., 345., 275.],
         [275., 355., 265., 205.],
         [100., 150.,  70.,  20.]]]])

In [71]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [72]:
set1 = unpickle("./content/data_batch_1")
set2 = unpickle("./content/data_batch_2")
set3 = unpickle("./content/data_batch_3")
set4 = unpickle("./content/data_batch_4")
set5 = unpickle("./content/data_batch_5")
x_train = np.vstack((set1[b'data'], set2[b'data'], set3[b'data'], set4[b'data'], set5[b'data']))
y_train = np.hstack((np.array(set1[b'labels']), np.array(set2[b'labels']), np.array(set3[b'labels']), np.array(set4[b'labels']) ,np.array(set5[b'labels']) ))
x_train.shape, y_train.shape

((50000, 3072), (50000,))

In [73]:
x_trial = []
y_trial = y_train[0:32]
for i in range(32):
    x_trial.append(x_train[i].reshape(3,32,32))
x_trial = np.array(x_trial)
y_trial = np.array(y_trial)

In [74]:
conv1 = conv2d(32, 3)
pool1 = maxpool2d(2)
conv2 = conv2d(64, 5)
pool2 = maxpool2d(2)
conv3 = conv2d(64, 3)
fc1 = fc_1(4096, 64)
fc2 = fc_2(64, 10)

In [75]:
def run(x, y):
    a1 = conv1.forward_pass(x)
    a2 = pool1.forward_pass(a1)
    a3 = conv2.forward_pass(a2)
    a4 = pool2.forward_pass(a3)
    
    # cannot flatten this directly at this position
    a5 = conv3.forward_pass(a4).reshape(32, 4096)
    # print(a5.shape)
    a6 = fc1.forward_pass(a5)
    a7 = fc2.forward_pass(a6)
    out = softmax(a7)
    # print(out.shape)
    # print(i, "actual: ", y[i],  "prediction: ", np.argmax(out))

    onehot = np.zeros(10)
    onehot[y] = 1
    grad_fc2 = out - onehot
    # print(grad_fc2.shape)

    grad_fc1 = fc2.backward_pass(grad_fc2)
    grad_conv3 = fc1.backward_pass(grad_fc1).reshape(32, 64, 8, 8)
    grad_pool2 = conv3.backward_pass(grad_conv3)
    grad_conv2 = pool2.backward_pass(grad_pool2)
    grad_pool1 = conv2.backward_pass(grad_conv2)
    grad_conv1 = pool1.backward_pass(grad_pool1)
    init_grad = conv1.backward_pass(grad_conv1)

    fc2.update()
    fc1.update()
    conv3.update()
    conv2.update()
    conv1.update()

In [76]:
y_trial

array([6, 9, 9, 4, 1, 1, 2, 7, 8, 3, 4, 7, 7, 2, 9, 9, 9, 3, 2, 6, 4, 3,
       6, 6, 2, 6, 3, 5, 4, 0, 0, 9])

In [77]:
begin = time.time()
run(x_trial, y_trial)
print(time.time()-begin, " seconds")

9.321716070175171  seconds


In [None]:
conv1 = conv2d(32, 3)
pool1 = maxpool2d(2)
conv2 = conv2d(64, 5)
pool2 = maxpool2d(2)
conv3 = conv2d(64, 3)
fc1 = fc_1(4096, 64)
fc2 = fc_2(64, 10)

In [84]:
def sgd(X, Y):
    begin = time.time()
    for i in range(100):
        idx = np.random.randint(50000, size=32)
        x = X[idx]
        y = Y[idx]
        
        # print(x.shape)

        x = x.reshape(32, 3, 32, 32)
        
        # print(x.shape)
    
        a1 = conv1.forward_pass(x)
        a2 = pool1.forward_pass(a1)
        a3 = conv2.forward_pass(a2)
        a4 = pool2.forward_pass(a3)

        # cannot flatten this directly at this position
        a5 = conv3.forward_pass(a4).reshape(32, 4096)
        # print(a5.shape)
        a6 = fc1.forward_pass(a5)
        a7 = fc2.forward_pass(a6)
        out = softmax(a7)
        # print(out.shape)
        # print(i, "actual: ", y[i],  "prediction: ", np.argmax(out))

        onehot = np.zeros(10)
        onehot[y] = 1
        grad_fc2 = out - onehot
        # print(grad_fc2.shape)

        grad_fc1 = fc2.backward_pass(grad_fc2)
        grad_conv3 = fc1.backward_pass(grad_fc1).reshape(32, 64, 8, 8)
        grad_pool2 = conv3.backward_pass(grad_conv3)
        grad_conv2 = pool2.backward_pass(grad_pool2)
        grad_pool1 = conv2.backward_pass(grad_conv2)
        grad_conv1 = pool1.backward_pass(grad_pool1)
        init_grad = conv1.backward_pass(grad_conv1)

        fc2.update()
        fc1.update()
        conv3.update()
        conv2.update()
        conv1.update()
        
        print(i, "time: ", time.time()-begin, " seconds")

In [None]:
sgd(x_train, y_train)

0 time:  9.375553131103516  seconds
1 time:  18.536211013793945  seconds
2 time:  27.906214952468872  seconds
3 time:  39.283106088638306  seconds
4 time:  50.215439319610596  seconds
5 time:  60.45882821083069  seconds
6 time:  70.59019494056702  seconds
7 time:  80.3832471370697  seconds
8 time:  89.55738806724548  seconds
9 time:  102.79946208000183  seconds
10 time:  113.89669990539551  seconds
11 time:  123.10000419616699  seconds
12 time:  132.6106400489807  seconds
13 time:  143.32399916648865  seconds
14 time:  153.39506912231445  seconds
15 time:  162.510183095932  seconds
16 time:  171.66717910766602  seconds
17 time:  180.84370708465576  seconds
18 time:  190.00481700897217  seconds
19 time:  199.49189114570618  seconds
20 time:  208.65070128440857  seconds
21 time:  218.72846913337708  seconds
22 time:  227.8660192489624  seconds
23 time:  236.99682903289795  seconds
24 time:  246.42545008659363  seconds
25 time:  256.05435013771057  seconds
26 time:  265.20826411247253  se