___
# __Convolutional Neural Network from scratch__
### _Author: Aki Taniguchi_
### _Original date: 19/02/2020_
### _Last update: 19/02/2020_
___

## I. Setting environment
___

In [1]:
import sys
sys.path.append("C:\\Users\\tngch\\Python Code and AI\\Neural Network")

# Load libraries
import numpy as np
from Deep_Neural_Network import activation_function

In [6]:
X = np.random.rand(3, 32, 32, 10)
Y = np.random.choice(2, 10)

A = {}
A['0'] = X

In [43]:
# Setting up the hyper-parameters ("LeNet-5")
architecture = ['init', 'conv', 'maxpool', 'conv', 'avgpool', 'fc', 'fc']
activations = ['init', "relu", 'none', 'relu', 'none', 'sigmoid']
filter_size = [0, 5, 2, 5, 2, 1, 1]
nb_kernel = [3, 8, 8, 16, 16, 120, 84] # first is RGB, fully connected layers are the number of neurons
padding = [0, 0, 0, 0, 0, 0, 0]
stride = [0, 1, 2, 1, 2, 0, 0]
L = len(architecture)

## II. Initialize models
___

In [46]:
# Define Shape of Output A. Note that the layer with full connection needs to be vectorized, but won't be here
# dim A is (channel, height, width, observation)
# width and height are both determined: int[(x + 2p - f) / s + 1]
def get_layer_shape(architecture, filter_size, nb_kernel, padding, stride, A):

    layer_shape = [A['0'].shape]

    m = A['0'].shape[3]
    n_h = A['0'].shape[1]
    n_w = A['0'].shape[2]
    
    L = len(architecture)
    f = filter_size
    k = nb_kernel
    p = padding
    s = stride

    # Note the last layer has only 2 outcome as we are not doing a softmax. This needs to be changed once it will be implemented.
    for l in range(1, L+1):
        if l != L:
            if architecture[l] != 'fc':
                n_h = int((n_h + 2*p[l] - f[l]) / s[l] + 1)
                n_w = int((n_w + 2*p[l] - f[l]) / s[l] + 1)
                layer_shape.append((k[l], n_h, n_w, m))
            else:
                layer_shape.append((k[l], m))
        else:
            layer_shape.append((2, m))

    return layer_shape

In [47]:
# Test get_layer_shape
layer_shape = get_layer_shape(architecture, filter_size, nb_kernel, padding, stride, A)

layer_shape

[(3, 32, 32, 10),
 (8, 28, 28, 10),
 (8, 14, 14, 10),
 (16, 10, 10, 10),
 (16, 5, 5, 10),
 (120, 10),
 (84, 10),
 (2, 10)]

In [69]:
# Parameters size:
# If Conv/Pool: (curr channel, prev channel, filter, filter) and (cur channel, 1, 1, 1)
# If FC from Conv/Pool: (curr channel, vectorized[prev output])
# If FC from FC: (curr channel, prev channel)
# bias is always (curr channel, 1) when FC
def initialize_model(architecture, filter_size, nb_kernel, padding, stride, A):

    W = {}; b = {}
    L = len(architecture)
    f = filter_size
    k = nb_kernel
    m = A['0'].shape[3]
    layer_shape = get_layer_shape(architecture, filter_size, nb_kernel, padding, stride, A)

    for l in range(1, L):

        # We need to initialize output as well to allocate the convolution output per index (otherwise Python doesn't allow allocation)
        A[str(l)] = np.zeros((layer_shape[l]))

        # Now initializing parameters
        if architecture[l] != 'fc':
            W[str(l)] = np.random.randn(k[l], k[l-1], f[l], f[l]) * 0.01
            b[str(l)] = np.zeros((k[l], 1, 1, 1))

        else:
            # Parameters size different at the moment of change from conv to fc due to vectorized output
            if architecture[l-1] != 'fc':
                W[str(l)] = np.random.randn(k[l], int(np.prod(A[str(l-1)].shape) / m))
            else:
                W[str(l)] = np.random.randn(k[l], k[l-1])
            
            b[str(l)] = np.zeros((k[l], 1))

    return A, W, b

In [70]:
# Test initialize_model
A, W, b = initialize_model(architecture, filter_size, nb_kernel, padding, stride, A)

for l in range(L):
    print(architecture[l])
    print("A[{0}] shape: {1}".format(l, A[str(l)].shape))
    if l != 0:
        print("W[{0}] shape: {1}".format(l, W[str(l)].shape))
        print("b[{0}] shape: {1}".format(l, b[str(l)].shape))

init
A[0] shape: (3, 32, 32, 10)
conv
A[1] shape: (8, 28, 28, 10)
W[1] shape: (8, 3, 5, 5)
b[1] shape: (8, 1, 1, 1)
maxpool
A[2] shape: (8, 14, 14, 10)
W[2] shape: (8, 8, 2, 2)
b[2] shape: (8, 1, 1, 1)
conv
A[3] shape: (16, 10, 10, 10)
W[3] shape: (16, 8, 5, 5)
b[3] shape: (16, 1, 1, 1)
avgpool
A[4] shape: (16, 5, 5, 10)
W[4] shape: (16, 16, 2, 2)
b[4] shape: (16, 1, 1, 1)
fc
A[5] shape: (120, 10)
W[5] shape: (120, 400)
b[5] shape: (120, 1)
fc
A[6] shape: (84, 10)
W[6] shape: (84, 120)
b[6] shape: (84, 1)


## III. Forward prop
___

In [143]:
# We won't be using numpy's pad function as this one is enough and quick to operate
# Only works for 3D matrix
def add_padding(A, padding, value=0, axis=(0, 1, 2)):

    kernel, height, width = axis

    horizontal_pad = np.zeros((A.shape[kernel], padding, A.shape[width] + 2*padding)) + value
    vertical_pad = np.zeros((A.shape[kernel], A.shape[height], padding)) + value

    padded_A = np.concatenate((vertical_pad, A), axis=width)
    padded_A = np.concatenate((padded_A, vertical_pad), axis=width)
    padded_A = np.concatenate((horizontal_pad, padded_A), axis=height)
    padded_A = np.concatenate((padded_A, horizontal_pad), axis=height)

    return padded_A

In [144]:
# Test add_padding
padded_matrix = add_padding(A['0'][:,:,:,0], 1, value=0, axis=(0, 1, 2))
print("Original matrix shape:", A['0'][:,:,:,0].shape)
print("Padded matrix:", padded_matrix.shape)
print("___________________")
print("Original matrix:", A['0'][:,:,:,0])
print("___________________")
print("Padded matrix:", padded_matrix)

Original matrix shape: (3, 32, 32)
Padded matrix: (3, 34, 34)
___________________
Original matrix: [[[0.68211589 0.16431628 0.21873957 ... 0.4513942  0.79306114 0.32373973]
  [0.96853521 0.15451759 0.25596092 ... 0.88739295 0.98164033 0.61304932]
  [0.82797514 0.23876771 0.26718728 ... 0.8370273  0.10725634 0.24952633]
  ...
  [0.80530619 0.39678488 0.02501216 ... 0.05313387 0.65240194 0.18754455]
  [0.44044975 0.9188455  0.44336157 ... 0.46367228 0.82713147 0.49512589]
  [0.41804523 0.90447351 0.54907265 ... 0.86258199 0.6044871  0.99689135]]

 [[0.3581536  0.79625069 0.6710511  ... 0.53648894 0.39812714 0.74356902]
  [0.3665738  0.3830787  0.4441578  ... 0.51962161 0.02578538 0.23988508]
  [0.47629511 0.15162973 0.09900973 ... 0.88211338 0.03120495 0.98345109]
  ...
  [0.03603372 0.23484851 0.90752172 ... 0.19738284 0.50216651 0.09663376]
  [0.86721578 0.10694605 0.0093212  ... 0.86104792 0.96772179 0.8318124 ]
  [0.02447807 0.89184239 0.54213392 ... 0.65143337 0.37996596 0.53056101]

In [145]:
# Note this only works for a 4D matrix
def slicing(A, layer, observation, height, width, filter_size, padding, stride):

    # Padding the matrix to work the slicing on
    # This is base matrix where we are going to create all the necessary slicing (hence the need to pad it first)
    # Also note that we don't need to specify the value of kernel, as we won't slice through the depth (instead we take it all)
    padded_A = add_padding(A['0'][:, :, :, observation], padding[layer], value=0, axis=(0, 1, 2))
    f = filter_size
    s = stride

    for i in range(height):
        v1 = i * s[layer]
        v2 = i * s[layer] + f[layer]

        for j in range(width):
            h1 = j * s[layer]
            h2 = j * s[layer] + f[layer]

            slice = padded_A[:, v1:v2, h1:h2, observation]

            yield slice, i, j

In [None]:
# Test slicing

for slice, i, j in slicing(A, layer, observation, height, width, filter_size, padding, stride):

In [None]:
def convolutional_forward_propagation():
    
    for m in range(observation):

        for l in range(layer):

            for k in range(kernel):
                if architecture[l] == 'conv':
                    for slice, i, j in slicing():
                        Z[k, i, j, m] = np.sum(np.prod(slice, W[str(l)])) + b[str(l)]
                        A[k, i, j, m] = activation_function(activations, Z[k, i, j, m])
                
                if architecture[l] == 'maxpool':
                    for slice, i, j in slicing():
                        A[k, i, j, m] = np.max(slice)

                if architecture[l] == 'avgpool'
                    for slice, i, j in slicing():
                        A[k, i, j, m] = np.mean(slice)

    for l in range(layer):
        if architecture[l] == 'fc':
            # Vectorize and calculate forward prop


    return A         

In [None]:
# Test forward prop
convolutional_forward_propagation()