In [45]:
import numpy as np


In [46]:
import numpy as np

def sigmoid(Z):
    """
    Implements the sigmoid activation in numpy
    
    Arguments:
    Z -- numpy array of any shape
    
    Returns:
    A -- output of sigmoid(z), same shape as Z
    cache -- returns Z as well, useful during backpropagation
    """
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

def relu(Z):
    """
    Implement the RELU function.
    Arguments:
    Z -- Output of the linear layer, of any shape
    Returns:
    A -- Post-activation parameter, of the same shape as Z
    cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
    """
    
    A = np.maximum(0,Z)
    
    assert(A.shape == Z.shape)
    
    cache = Z 
    return A, cache


def relu_backward(dA, cache):
    """
    Implement the backward propagation for a single RELU unit.
    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently
    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    
    return dZ

def sigmoid_backward(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.
    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently
    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

In [42]:
def initialise_parameter(layers_dim):
    parameters = {}
    L = len(layers_dim)
    for i in range(1,L):
        parameters['W'+str(l)] = np.random.randn(layers_dim[l],layers_dim[l-1])*0.01
        parameters['b'+str(l)] = np.zeros(layers_dim[l],1)
    parameters['W'+str(L)] = np.random.randn(1,layers_dim[L-1])*0.01
    parameters['b'+str(L)] = np.zeros(1,1)
    return parameters

In [43]:
def fwd_propagation(X,parameters):
    caches = []
    A=X
    L = len(parameters)
    for l in range(1,L):
        A_prev = A
        A, cache = linear_activation(A_prev,parameters['W'+str(l)], parameters['W'+str(l)],activation="relu")
        caches.append(cache)
    
    AL,cache = linear_activation(A,parameters['W'+str(L)], parameters['W'+str(L)],activation="sigmoid")
    caches.append(cache)
    assert(AL.shape==(1,X[1]))
    return AL,caches

In [44]:
def linear_activation(A_prev,W,b,activation):
    Z,linear_cache = linear_forward(A_prev,W,b)
    if activation=="relu":
        A, activation_cache = relu(Z)
    elif activation=="sigmoid":
        A, activation_cache = sigmoid(Z)
        cache = (linear_cache,activation_cache)
    
    assert(A.shape == (W.shape[0],A_prev.shape[1]))
    return A,cache

In [39]:
def linear_forward(A,W,b):
    Z = np.dot(W, A) + b
    assert (Z.shape == (W.shape[0],A.shape[1]))
    cache = (A, W, b)
    return Z, cache    

In [50]:
def compute_cost(AL,Y):
    cost = (-1 / m) * np.sum(np.multiply(Y, np.log(AL)), np.multiply(1 - Y, np.log(1 - AL)))
    cost = np.squeeze(cost)
    assert(cost.shape==())
    return cost

In [None]:
def bkwd_propagation(AL,Y,caches):
    grad={}
    L = len(caches)
    m = AL.shape[1]
    dAL = dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    current_cache = caches[-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL,current_cache[1]),current_cache[0])
    