In [None]:
#The following code has the helper functions to build a deep layer neural network with L layer. 

In [9]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from dnn_utils_v2 import sigmoid , sigmoid_backward , relu , relu_backward

In [None]:
#The function below is used to initailize parameters.

In [14]:
def initialize_parameters(layer_dims):
    np.random.seed(100)
    parameters={}
    L=len(layer_dims)
    for l in range(1,L):
        parameters["W"+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros(shape=(layer_dims[l],1))
        #To debug if the shapes are not correct (prints assertion error)
        assert(parameters["W"+str(l)].shape==(layer_dims[l],layer_dims[l-1]))
        assert(parameters["b"+str(l)].shape==(layer_dims[l],1))       
    return parameters  

In [None]:
#Making a linear function : Z = W[l] + A[l-1] + b[l]
#This doesn't require a for loop as the input given will be the current values of A,W,b.

In [17]:
def linear_function(A,W,b):
    Z=np.dot(W,A)+b
    assert(Z.shape == (W.shape[0],A.shape[1]))
    cache=(A,W,b)
    return Z,cache    

In [None]:
#Making linear activation forward function.
#It has two functions sigmoid and relu (the sigmoid function returns Z and cache and same with relu).

In [18]:
def linear_activation_forward(A_prev,W,b,activation):
    if activation=="sigmoid":
        Z,linear_cache = linear_forward(A_prev,W,b)
        A,activation_cache=sigmoid(Z)
    elif activation=="relu":
        Z,linear_cache= linear_forward(A-prev,W,b)
        A,activation_cache=relu(Z)
    assert(A.shape==(W.shape[0], A_prev.shape[1]))
    cache=(linear_cache,activation_cache)   
    return cache

In [None]:
#Now a function which could loop over the L layers of the network.
#The function that has activation function till L-1 layer and the last layer has sigmoid activation.

In [19]:
def L_model_forward(X,parameters):
    caches=[]
    A=X
    L=len(parameters)//2
    for l in range(1,L):
        A_prev=A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation='relu')
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation='sigmoid')
    caches.append(cache)
    assert(AL.shape == (1,X.shape[1]))
    return AL, caches

In [20]:
#A function that helps to compute the cost function
#np.squueze is to bring the array to a value the array contains.(Ex:[[0.054]] becomes 0.054)

In [21]:
def compute_cost(AL, Y):
    
    m = Y.shape[1]
    cost = (-1 / m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1 - Y, np.log(1 - AL)))   
    cost = np.squeeze(cost) 
    assert(cost.shape == ())
    
    return cost

In [22]:
#The below functions perform backward propagation

In [23]:
def linear_backward(dZ, cache):

    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = np.dot(dZ, cache[0].T) / m
    db = np.squeeze(np.sum(dZ, axis=1, keepdims=True)) / m
    dA_prev = np.dot(cache[1].T, dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    
    return dA_prev, dW, db

In [24]:
#A function that performs linear activation backward propagation

In [25]:
def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [26]:
#A function that could perform the backward propagation for L layers of network

In [27]:
def L_model_backward(AL, Y, caches):
    
    grads = {}
    L = len(caches) 
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) 
    
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache = caches[-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0])
    
    for l in reversed(range(L-1)):
        
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0])
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

In [None]:
#The function that will update the parameters

In [28]:
def update_parameters(parameters, grads, learning_rate):
    
    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l + 1)] - learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l + 1)] - learning_rate * grads["db" + str(l + 1)]

    return parameters