In [1]:
from scipy.misc import imread
import matplotlib.pyplot as plt

img = imread('./data/bulld.jpg')
plt.imshow(img)
plt.show()

print(img.shape)
print(img[0].shape)
print(img[0][:5])

<Figure size 640x480 with 1 Axes>

(601, 800, 3)
(800, 3)
[[113 154 206]
 [116 157 209]
 [117 159 209]
 [113 155 205]
 [109 150 204]]


In [123]:
import numpy as np

def assess_output_vol(input_size, fi, s, padd, K):
    """
        Computes the suitable output volume given S, F, P and K
        Assumes a squared filter
    """

    on_h_axis = (input_size[0]-fi + 2*padd[0])/(s) + 1
    assert on_h_axis == int(on_h_axis), "The height axis requires padding : {}".format(on_h_axis)
    on_w_axis = (input_size[1]-fi + 2*padd[1])/(s) + 1
    assert on_w_axis == int(on_w_axis),"The width axis requires padding : {}".format(on_w_axis)
    on_d_axis = K
    #print(f'The dimensions of the output volume are \
    #    {on_h_axis} by {on_w_axis} by {on_d_axis}.')
    return (int(on_h_axis), int(on_w_axis), on_d_axis)

def convolutional_layer1(img, filter_size, stride, padding, K):
    """
        Receives a image (H*W*3)
        The kernel size is filter_size*filter_size*3
        The weights are learned by the CNN where each "slice" is a filter
        The depth of the output map is represented by K

        Returns:
            Activation map applying the filter to the input
    """
    weights = np.random.rand(K, filter_size, filter_size, img.shape[2])
    biases = np.random.rand(K, 1)
    print(f'The weights shape is {weights.shape} and biases shape is {biases.shape}')
    print(weights[0,:,:,:].shape) # filter0 or weight0

    out_dims = assess_output_vol(img.shape, filter_size, stride, padding,K)

    activation_map = np.zeros(out_dims)
    assert activation_map.shape == out_dims

    for d in range(out_dims[2]):
        for i in range(0,out_dims[0],stride):
            for j in range(0,out_dims[1],stride):
                activation_map[int(i/4),int(j/4),d] = np.sum(img[i:i+filter_size,j:j+filter_size,:] * weights[d:,:,:]) + biases[d]
    
    print(f'The output volume shape is {activation_map.shape}\n')

    return activation_map

K = 10 # depth of output map

# Testing for suitable strides
assess_output_vol((11,11,3), 5, 2, [0,0], K)
assess_output_vol(img.shape, 22, 4, [21/2,1], K)
# Passing the image through a basic convolution layer
activn_map_conv = convolutional_layer1(img, 22, 4, [21/2,1], K)
print(activn_map_conv[:2,:2,:])

The weights shape is (10, 22, 22, 3) and biases shape is (10, 1)
(22, 22, 3)
The output volume shape is (151, 196, 10)

[[[1330407.4187215  1195493.15730372 1059510.73270879  924149.0855602
    789616.76798194  660617.77471458  529231.35211964  396691.8529326
    262718.08538522  132275.13347974]
  [1337604.09159686 1201877.47391614 1065205.04047298  929130.3568367
    793863.18107336  664158.92392204  532068.16703962  398830.9398846
    264095.9936101   132931.45458364]]

 [[1342462.04545641 1206211.7288681  1069170.21322827  932714.81313622
    796887.10017989  666696.0990414   534058.93771923  400399.74574183
    265221.60694619  133586.53778719]
  [1345924.50710304 1209364.54515611 1071912.37375292  935115.05621883
    798999.71937799  668473.99480967  535512.32407198  401473.89646173
    265962.68680469  133945.44053794]]]


In [141]:
def find_volume_pooling(input_size, F, S):
    """
        Given the input size (H,W,D) it calculates 
            the output volume (H',W',D) by the pooling operation
    """
    on_h_axis = (input_size[0] - F)/(S) + 1
    assert on_h_axis == int(on_h_axis), "The height axis requires padding : {}".format(on_h_axis)
    on_w_axis = (input_size[1] - F)/(S) + 1
    assert on_w_axis == int(on_w_axis),"The width axis requires padding : {}".format(on_w_axis)
    on_d_axis = input_size[2] # assuming (H,W,D)

    return (int(on_h_axis), int(on_w_axis), on_d_axis)

def pooling_layer1(input, F, S):
    """
        The basic intuition is very similar to the Convolutional layer
            expect it doesn't require weights or biases
        The goal here is to reduce the is to "compress" the input volume by
            discarding everything other than the maximum over a F * F area
        The most common way to compress is to use a MAX operation over each filter slice

    """
    
    out_dims = find_volume_pooling(input.shape, F, S)
    pooling_out = np.zeros(out_dims)

    for d in range(input.shape[2]):
        for i in range(0,input.shape[0],S):
            for j in range(0,input.shape[1],S):
                # Implementing the MAX-Pooling operation
                pooling_out[int(i),int(j),d] = np.max(input[i:i+F,j:j+F,d])
    
    print(f'The output volume shape is {activation_map.shape}\n')

    return pooling_out


F=2 # filter size by convention
S=2 # stride by convention

# Testing the output volume
find_volume_pooling((224,224,64), F, S)
find_volume_pooling((151+1,186,10), F, S) # It means that it requires zero-padding of 1 in the height component

# adding zero padding to height - it is irrelevant to which part
input_pooling = np.empty_like(activn_map_conv)
#print(input_pooling.shape)
t = np.zeros((input_pooling.shape[0],1))
print(t.shape)
for d in range(input_pooling.shape[2]):
    input_pooling[:,:,d] = np.append(activn_map_conv[:,:,d], t,axis=0)
    assert not (input_pooling[:,:,d].shape == activn_map_conv[:,:,d])
#input_pooling = np.append(input_pooling, np.zeros((input_pooling.shape[0],1,1)),axis=0)


(151, 1, 1)


In [None]:
# TODO 
# Implement fully connected layer
# Change the loops to vectorized operations
# Conversion from fully connected layer to CONV layer and vice-versa
# Explore doing a small autograd engine where both pooling and convolutional layers are applied sequentially
#