In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
def normalize(v):
    norm = np.linalg.norm(v)
    if norm == 0:
        return v
    else:
        return v/norm

In [None]:
###  Dimension of a = (m,n_h,n_w,n_c)   W = (f, f, n_c_prev, n_c)  b = (1, 1, 1, n_c)
### hparams is a dict that will contain stride, padding

In [None]:
def zero_pad(input_array, pad):
    X_pad =  np.pad(input_array, ((0,0), (pad, pad), (pad, pad), (0, 0)), 'constant',constant_values=(0,0))
    return X_pad

In [None]:
def conv_single_step(a_prev, W, b):
    s = np.multiply(a_prev, W)
    Z = np.sum(s)
    Z = Z + np.squeeze(b)
    
    return Z

In [None]:
def conv_forward_step(a_prev, W, b, hparams):
    stride = hparams["stride"]
    pad = hparams["pad"]
    
    (m, n_H_prev, n_W_prev, n_C_prev) = a_prev.shape
    (f, f, n_C, n_C_prev) = W.shape
    
    n_H = int((n_H_prev + 2*pad - f)/stride) + 1
    n_W = int((n_W_prev + 2*pad - f)/stride) + 1
    
    Z = np.zeros((m, n_H, n_W, n_C))
    
    A_pad_prev = zero_pad(a_prev, pad)
    
    for i in range(m):
        a_pad_prev = A_pad_prev
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    
                    v_start = h*stride
                    v_end = v_start + f
                    h_start = w*stride
                    h_end = h_start + f
                    
                    a_slice_prev = a_pad_prev[m, v_start:v_end, h_start:h_end, :]
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, W[:, :, :, c], b[:, :, :, c])
                    
    cache = (a_prev, W, b, hparams)
    
    return Z, cache

In [None]:
def pooling_layer(a_prev, hparams, mode = "max") : #default pooling is max
    (m, n_H_prev, n_W_prev, n_C_prev) = a_prev.shape
    
    f = hparams["f"]
    stride = hparams["stride"]
    
    n_H = int(1 + (n_H_prev - f)/stride)
    n_W = int(1 + (n_W_prev - f)/stride)
    n_C = n_C_prev
    
    A = np.zeros((m, n_H, n_W, n_C))
    
    for i in range(m):
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    
                    v_start = h*stride
                    v_end = v_start + f
                    h_start = h*stride
                    h_end = h_start + f
                    
                    a_slice_prev = a_prev[i, v_start:v_end, h_start:h_end, c]
                    
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_slice_prev)
                    elif mode == "average":
                        A[i, h, w, c] = np.average(a_slice_prev)
                        
    cache = (A_prev, hparams)
    
    return A, cache

In [None]:
def conv_backward(dZ, cache):
    (A_prev, W, b, hparams) = cache
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape
    
    stride = hparams["stride"]
    pad = hparams["pad"]
    
    (m, n_H, n_W, n_C) = dZ.shape
    
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))
    
    A_pad_prev = zero_pad(A_prev, pad)
    dA_pad_prev = zero_pad(dA_prev, pad)
    
    for i in range(m):
        
        a_pad_prev = A_pad_prev[i, :, :, :]
        da_pad_prev = A_pad_prev[i, :, :, :]
        
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    v_start = stride*h
                    v_end = v_start + f
                    h_start = stride*w
                    h_end = h_start + f
                    
                    a_slice = a_pad_prev[v_start:v_end, h_start:h_end, :]
                    da_pad_prev[v_start:v_end, h_start:h_end, :] += np.multiply(W[:, :, :, c], dZ[i, h, w, c])
                    dW[:, :, :, c] = a_slice*dZ[i, h, w, c]
                    db[:, :, :, c] = dZ[i, h, w, c]
                    
        dA_prev[i, :, :, :] = dA_pad_prev[pad:-pad, pad:-pad, :]
        
        return dA_prev, dW, db

In [None]:
def pooling_backwards(dA, cache, mode = "max"):
    (A_prev, hparams) = cache
    
    stride = hparams["stride"]
    f = hparams["f"]
    
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    
    dA_prev = np.zeros((A.shape))
    
    for i in range(m):
        a_prev = A_prev[i, :, :, :]
        
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    v_start = h*stride
                    v_end = V_start + f
                    h_start = w*stride
                    h_end = h_start + f
                    
                    if mode == "max":
                        a_slice_prev = a_prev[v_start:v_end, h_start:h_end, c]
                        mask = (a_slice_prev == np.max(a_slice_prev))
                        dA_prev[i, v_start:v_end, h_start:h_end, c] += np.multiply(dA[i, h, w, c], mask)
                    
                    elif mode == "average":
                        da = np.copy(dA[i, h, w, c])
                        dA_prev[i, v_start:v_end, h_start:h_end, c] += np.full((f, f), da/(f + f))
                        
    assert (dA_prev.shape == A_prev.shape)
    return dA_prev

In [None]:
def initialize_params_fc(layer_dims):
    params_fc = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params_fc["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])
        params_fc["b" + str(l)] = np.zeros((layer_dims[l], 1))
        
    return params_fc
    

In [None]:
def relu(x):
    return x * (x > 0), x

def sigmoid(x):
    return 1/(1 + np.exp(-x)), x
    
def softmax(x):
    return np.exp(x)/sum(np.exp(x)), x
    

In [None]:
def fc_forward_single_step(A_prev, W, b, activation):
    
    if activation == 'relu':
        Z = np.dot(W, A_prev) + b
        linear_cache = (A_prev, W, b)
        A, activation_cache = relu(Z)
        
    elif activation == 'sigmoid':
        Z = np.dot(W, A_prev) + b
        linear_cache = (A_prev, W, b)
        A, activation_cache = sigmoid(Z)
        
    elif activation == 'softmax':
        Z = np.dot(W, A_prev) + b
        linear_cache = (A_prev, W, b)
        A, activation_cache = softmax(Z)
        
    cache = (linear_cache, activation_cache)
    
    return A, cache

In [None]:
def fc_forward(X, params):
    caches = []
    A = np.copy(X)
    L = len(params) // 2
    
    for l in range(1, L):
        A_prev = A
        A, cache = fc_forward_single_step(A_prev, params["W" + str(l)], params["b" + str(l)], "relu")
        caches.append(cache)
        
    AL, cache = fc_forward_single_step(A_prev, params["W" + str("L")], params["b" + str("L")], "softmax")
    caches.append(cache)
    
    return AL, caches

In [None]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    return (np.squeeze((-1/m)*np.sum(np.dot(Y, np.log(AL).T), np.dot(1-Y, (np.log(1-AL)).T))))

In [None]:
def softmax_backward(s):
    s = s.reshape(-1, 1)
    return np.diagflat(s) - np.dot(s, s.T)

def relu_backward(x):
    x[x > 0] = 1
    x[x <= 0] = 0
    return x
    
def fc_backward_single_step(dA, cache, activation):
    linear_cache, activation_cache = cache
    A_prev, W, b = linear_cache
    m = A_prev.shape[1]
    
    if activation == 'relu':
        dZ = relu_backward(dA)
        dA_prev = np.dot(W.T, dZ)
        dW = (1/m) * np.dot(dZ, A_prev.T)
        db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA)
        dA_prev = np.dot(W.T, dZ)
        dW = (1/m) * np.dot(dZ, A_prev.T)
        db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
        
    return dA_prev, dW, db

In [None]:
def fc_backward(AL, Y, caches):
    
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)  # Making sure the shape are same
    
    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    
    current_cache = caches[L-1]
    grads["dA" + str(L-1)],
    grads["dW" + str(L-1)],
    grads["db" + str(L-1)] = fc_backward_single_step(dA, cache, "sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        
        grads["dA" + str(l)],
        grads["dW" + str(l)],
        grads["db" + str(l)] = fc_backward_single_step(grads["dA" + str(l+1)],current_cache, "relu")
        
    
    return grads

In [None]:
def update_params_fc(params, grads, learning_rate):
    L = len(params) // 2
    for l in range(L):
        params["W" + str(l+1)] = params["W" + str(l+1)] - (learning_rate*grads["dW" + str(l+1)])
        params["b" + str(l+1)] = params["W" + str(l+1)] - (learning_rate*grads["db" + str(l+1)])
    
    return params
    

In [None]:
#Checking the dimensions and plotting an example

index = 232  # Can be any whole number less than 60,000
print(x_train[index].shape)

print(y_train[index])
plt.imshow(x_train[index], cmap="Greys", norm = colors.Normalize(vmin=0.0, vmax=1.0))

# Making dimensions -> (number_of_examples, 28, 28, 1)
x_train = np.expand_dims(x_train, axis = 0).reshape(60000, 28, 28, 1)
x_test = np.expand_dims(x_test, axis = 0 ).reshape(10000, 28, 28, 1)

#Normalizing the arrays
x_train = normalize(x_train)
x_test = normalize(x_test)

In [None]:
def le_net_model(x_train, y_train, learning_rate, params, hparams):
    [f1, f2, p1, p2, b1, b2, b3, b4, w3, w4 ] = params
    [n_C0, n_C1, n_C2] = hparams
    
    conv1, cache1 = conv_forward_step(x_train, (f1, f1, n_C0, n_C1), b1, {"stride" : 1, "pad" : 0})
    a_conv1, conv1 = relu(conv1)
    
    pool1, cache2 = pooling_layer(a_conv1, {"f" : p1, "stride" : 1}, mode = "average")
    
    conv2, cache3 = conv_forward_step(pool1, (f2, f2, n_C1, n_C2), b2, {"stride" : 1, "pad" : 0})
    a_conv2, conv2 = relu(conv2)
    
    pool2, cache4 = pooling_layer(a_conv2, {"f" : p2, "stride" : 1}, mode = "average")
    
    print(pool2.shape)
    

In [None]:
n_C0 = 1
n_C1 = 6
n_C2 = 16


f1 = 5
f2 = 5
p1 = 2
p2 = 2
b1 = (1, 1, n_C0)
b2 = (1, 1, n_C1)
b3 = (1, 1, n_C2)
b4 = (1, 1, n_C3)
w3 = np.random.randn(2, 3)
w4 = np.random.randn(2, 3)