# Implementation of Convolutional Neural Networks from scratch

In [1]:
import numpy as np
np.random.seed(1)

In [2]:
def conv_one_step(image_slice, W, b):
    s = W * image_slice      #image_slice.shape = (f,f)
    Z = s.sum()
    Z = np.squeeze(Z+b)
    return Z

In [3]:
def conv2D(images, W, b, hparameters):
    stride = hparameters['stride']
    pad = hparameters['pad']
    m = images.shape[0]         # m denotes no of images
    H,Wid,C = W.shape
    
    # Padding height and width
    images_pad = np.pad(images, ((0,0),(pad,pad),(pad,pad)), 'constant', constant_values=(0))
    
    # Initiating zero matrix of size of Convolutional Block
    Z = np.zeros((m,16,16,4))
    
    for i in range(m): 
        image_pad = images_pad[i]
        for h in range(H):
            for w in range(Wid):
                for c in range(C):
                    vert_start = h * stride
                    vert_end = vert_start + 3
                    hori_start = w * stride
                    hori_end = hori_start + 3
                    image_slice = image_pad[vert_start:vert_end, hori_start:hori_end]
                    Z[i,h, w, c] = conv_one_step(image_slice,W[:,:,c],b[:,:,c])
    cache = (images, W, b, hparameters)        # Will be used during backprop
    return Z, cache

## Let's initialize random weights for computation

In [4]:
# For conv block
W = np.random.randn(3,3,4)
b = np.random.randn(1,1,4)

# For first fully connected layer
W_fc_1024 = np.random.randn(10,1024)
b_fc_1024 = np.zeros((10,1))

# For second fully connected layer
W_10 = np.random.randn(1,10)
b_10 = np.zeros((1,1))

### Let's check our convolutional block and find A using relu activation

In [5]:
np.random.seed(1)
no_of_images = m = 2
images = np.random.randn(m,32,32)  # m images of (32,32)
hparameters = {"pad" : 1,
               "stride": 2}

Z, cache_conv = conv2D(images, W, b, hparameters)
A = np.maximum(0,Z)
assert(A.shape == Z.shape)
activation_cache = Z        # Will be used for backpropogation
print(A.shape)

(2, 16, 16, 4)



Shape of A comes out to be (2,16,16,4) which is same as what we expected.

## Fully connected layers
* For first fully connected layer we just need to reshape it.
* For second fc layer, we compute W*X + b

In [6]:
fc_1024 = np.reshape(A, (m,-1))
assert(fc_1024.shape[1] == 1024)
fc_cache1 = fc_1024     # Will be used while backpropogation

fc_10 = W_fc_1024.dot(fc_1024.T) + b_fc_1024
fc_cache2 = fc_10       # Will be used while backpropogation

y = W_10.dot(fc_10) + b_10
print(y)

[[-12.89682204  13.37765949]]


### Calculating loss
Now, we have our predicted output. Let's define MSE loss function and calculate loss comapring to some random Y_test.

In [7]:
 def MSE_loss(Y, Y_test):
     return np.square(Y - Y_test).mean()

In [8]:
Y_test = np.random.rand(m) * 10
print(MSE_loss(y, Y_test))

220.13788448


## Success
**Now, we have successfully implemented forward propogation of Convolutional Neural Nets.**
# Backward Propogation
In backpropogation, we will find gradients of Weights, biases and layers using chain rule. It will take 5 steps:
1. Find d_Y.
2. Find d_w10, d_b10, d_fc10 (first fully connected).
3. Find d_w1024, d_b1024, d_fc1024 (second fully connected).
4. Find dZ (relu_backward).
5. Find d_images, d_W, d_b (conv_backward).

### Finding d_Y

In [9]:
d_Y = 2 * y

### Finding d_w10, d_b10, d_fc10

In [10]:
d_w10 = np.dot(d_Y, fc_10.T) / m
d_b10 = np.sum(d_Y,axis=1,keepdims=True)/m
d_fc10 = np.dot(W_10.T,d_Y)
assert (d_fc10.shape == fc_10.shape)

### Finding d_w1024, d_b1024, d_fc1024

In [11]:
d_w1024 = np.dot(d_fc10, fc_1024) / m
d_b1024 = np.sum(d_fc10,axis=1,keepdims=True)/m
d_fc1024 = np.dot(W_fc_1024.T,d_fc10).T
assert (d_fc1024.shape == fc_1024.shape)

### Finding dZ

In [12]:
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    dZ[Z <= 0] = 0
    assert (dZ.shape == Z.shape)    
    return dZ

Change shape of fc_1024 layer to size of conv_block i.e (m,16,16,4).

In [13]:
d_fc = d_fc1024.reshape((m,16,16,4))

In [14]:
assert (d_fc.shape == A.shape) # Just to be sure d_fc is of correct dimensions
dZ = relu_backward(d_fc, activation_cache)
print(dZ.shape)

(2, 16, 16, 4)


### Finding d_images, d_W, d_b. 

In [15]:
def conv_backward(dZ, cache):
    (images, W, b, hparameters) = cache

    m = dZ.shape[0]
    stride = hparameters['stride']
    pad =hparameters['pad']
    
    # Initialize d_images, dW, db with the zeros
    d_images = np.zeros((m,32,32))   
    dW = np.zeros((3,3,4))
    db = np.zeros((1,1,4))

    images_pad = np.pad(images,((0,0),(pad,pad),(pad,pad)),'constant', constant_values=0)
    d_images_pad = np.pad(d_images,((0,0),(pad,pad),(pad,pad)),'constant', constant_values=0)
    
    for i in range(m):        # for every image
        
        image_pad = images_pad[i,:,:]
        d_image_pad = d_images_pad[i,:,:]
        
        for h in range(16):                  
            for w in range(16):              
                for c in range(4):           
                    
                    # Use the corners to define the slice from image_pad
                    image_slice = images_pad[i,stride*h:stride*h+3,stride*w:stride*w+3]
                    
                    # Update gradients for the window and the filter's parameters
                    d_image_pad[stride*h:stride*h+3,stride*w:stride*w+3] += W[:,:,c] * dZ[i, h, w, c]
                    dW[:,:,c] += image_slice * dZ[i, h, w, c]
                    db[:,:,c] += dZ[i, h, w, c]
                    
        d_images[i, :, :] = d_image_pad[pad:-pad,pad:-pad]
    
    assert(d_images.shape == (m, 32, 32))
    return d_images, dW, db

In [16]:
d_images, dW, db = conv_backward(dZ, cache_conv)
assert (d_images.shape == images.shape == (m,32,32))

**Now we have dW, db, d_w10, d_b10, d_w1024 and d_b1024 and these gradients can be used to minimize our mse_loss function using any of the optimizer such as gradient descent, rmsprop or adam.**