In [1]:
from scipy.misc import imread
import matplotlib.pyplot as plt

img = imread('./data/bulld.jpg')
plt.imshow(img)
plt.show()

print(img.shape)
print(img[0].shape)
print(img[0][:5])

<Figure size 640x480 with 1 Axes>

(601, 800, 3)
(800, 3)
[[113 154 206]
 [116 157 209]
 [117 159 209]
 [113 155 205]
 [109 150 204]]


In [117]:
import numpy as np

def assess_output_vol(img_dimens, fi, s, padd, K):
    """
        Computes the suitable output volume given S, F, P and K
        Assumes a squared filter
    """

    on_h_axis = (img_dimens[0]-fi + 2*padd[0])/(s) + 1
    assert on_h_axis == int(on_h_axis), "The height axis requires padding : {}".format(on_h_axis)
    on_w_axis = (img_dimens[1]-fi + 2*padd[1])/(s) + 1
    assert on_w_axis == int(on_w_axis),"The width axis requires padding : {}".format(on_w_axis)
    on_d_axis = K
    #print(f'The dimensions of the output volume are \
    #    {on_h_axis} by {on_w_axis} by {on_d_axis}.')
    return (int(on_h_axis), int(on_w_axis), on_d_axis)

def convolutional_layer1(img, filter_size, stride, padding, K):
    """
        Receives a image (H*W*3)
        The kernel size is filter_size*filter_size*3
        The weights are learned by the CNN where each "slice" is a filter
        The depth of the output map is represented by K

        Returns:
            Activation map applying the filter to the input
    """
    weights = np.random.rand(K, filter_size, filter_size, img.shape[2])
    biases = np.random.rand(K, 1)
    print(f'The weights shape is {weights.shape} and biases shape is {biases.shape}')
    print(weights[0,:,:,:].shape) # filter0 or weight0

    out_dims = assess_output_vol(img.shape, filter_size, stride, padding,K)

    activation_map = np.zeros(out_dims)
    assert activation_map.shape == out_dims

    for d in range(out_dims[2]):
        for i in range(0,out_dims[0],stride):
            for j in range(0,out_dims[1],stride):
                activation_map[int(i/4),int(j/4),d] = np.sum(img[i:i+filter_size,j:j+filter_size,:] * weights[d:,:,:]) + biases[d]
    
    print(f'The output volume shape is {activation_map.shape}\n')

    return activation_map

K = 10 # depth of output map

# Testing for suitable strides
assess_output_vol((11,11,3), 5, 2, [0,0], K)
assess_output_vol(img.shape, 22, 4, [21/2,1], K)
# Passing the image through a basic convolution layer
activn_map = convolutional_layer1(img, 22, 4, [21/2,1], K)
print(activn_map[:2,:2,:])

The weights shape is (10, 22, 22, 3) and biases shape is (10, 1)
(22, 22, 3)
The output volume shape is (151, 196, 10)

[[[1339390.53763604 1207973.19209505 1071460.24142865  937533.76038371
    803305.16984127  668083.23083711  534079.74624455  400218.40996307
    269657.40933163  134571.48932194]
  [1346727.59180844 1214571.65238145 1077298.57644341  942609.23587742
    807616.47132029  671688.84537972  537035.17029588  402433.82374281
    271170.29577879  135331.77750813]]

 [[1351783.90002297 1219151.69536316 1081367.97037481  945990.31297758
    810563.04537007  674141.19217319  539013.34586852  403979.14540541
    272028.37884428  135725.46709527]
  [1355393.53406103 1222416.06964252 1084268.09405383  948548.08695895
    812758.91797002  675962.74249308  540455.41161188  405070.28175456
    272760.94096318  136072.50495358]]]


(2, 2, 10)
