In [12]:
import numpy as np

def relu(X):
    return np.maximum(0,X)

def sigmod(X):
    return 1./(1+np.exp(-X))

def dictionary_to_vector(params):
    keys = []
    count = 0
    for key in ['W1','b1','W2','b2','W3','b3']:
        new_vector = np.reshape(params[key],(-1,1))
        keys = keys + [key]*new_vector.shape[0]

        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta,new_vector), axis=0)
        count += 1
    return theta,keys

def vector_to_dictionary(theta):
    params = {}
    params["W1"] = theta[:20].reshape((5,4))
    params["b1"] = theta[20:25].reshape((5,1))
    params["W2"] = theta[25:40].reshape((3,5))
    params["b2"] = theta[40:43].reshape((3,1))
    params["W3"] = theta[43:46].reshape((1,3))
    params["b3"] = theta[46:47].reshape((1,1))
    return params

def gradients_to_vector(gradients):
    count = 0
    for key in ["dW1","db1", "dW2","db2","dW3","db3"]:
        new_vector = np.reshape(gradients[key],(-1,1))
        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta,new_vector),axis = 0)
        count += 1
    return theta

def forward_propagation(x,theta):
    J = thetha*x
    return J

def backward_propagation(x, theta):
    dtheta = x
    return dtheta

def gradient_check(x, theta, epsilon=1e-7):
    thetaplus = theta + epsilon
    thetaminus = theta - epsilon
    J_plus = forward_propagation(x,thetaplus)
    J_minus = forward_propagation(x, thetaminus)
    gradapprox = (J_plus-J_minus)/(2*epsilon)

    grad = backward_propagation(x, theta)

    numerator = np.linalg.norm(grad-gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)

    difference = numerator/denominator

    if difference < epsilon:
        print("the gradient is correct")
    else:
        print("the gradient is wrong")
    return difference

def forward_propagation_n(X,Y,params):
    m = X.shape[1]
    W1 = params['W1']
    b1 = params['b1']
    W2 = params['W2']
    b2 = params['b2']
    W3 = params['W3']
    b3 = params['b3']
    
    Z1 = np.dot(W1,X)+b1
    A1 = relu(Z1)
    Z2 = np.dot(W2,A1)+b2
    A2 = relu(Z2)
    Z3 = np.dot(W3,A2)+b3
    A3 = sigmod(Z3)
    
    logprobs = np.multiply(-np.log(A3),Y)+np.multiply(-np.log(1-A3),1-Y)
    cost = 1./m*np.sum(logprobs)
    cache = (Z1,A1,W1,b1,Z2,A2,W2,b2,Z3,A3,W3,b3)
    return cost, cache

def backward_propagation_n(X,Y,cache):
    m = X.shape[1]
    (Z1,A1,W1,b1,Z2,A2,W2,b2,Z3,A3,W3,b3) = cache
    dZ3 = A3-Y
    dW3 = 1./m*np.dot(dZ3,A2.T)
    db3 = 1./m*np.sum(dZ3,axis=1,keepdims=True)
    
    dA2 = np.dot(W3.T,dZ3)
    dZ2 = np.multiply(dA2,np.int64(A2>0))
    dW2 = 1./m*np.dot(dZ2,A1.T)
    db2 = 1./m*np.sum(dZ2,axis=1,keepdims=True)
    
    dA1 = np.dot(W2.T,dZ2)
    dZ1 = np.multiply(dA1,np.int64(A1>0))
    dW1 = 1./m*np.dot(dZ1,X.T)
    db1 = 1./m*np.sum(dZ1,axis=1,keepdims=True)
    
    gradients = {"dZ3":dZ3,"dW3":dW3,"db3":db3,
                "dA2":dA2,"dZ2":dZ2,"dW2":dW2,"db2":db2,
                "dA1":dA1,"dZ1":dZ1,"dW1":dW1,"db1":db1}
    return gradients

def gradient_check_n(params,gradients,X,Y,epsilon=1e-7):
    params_values,_=dictionary_to_vector(params)
    grads = gradients_to_vector(gradients)
    num_params = params_values.shape[0]
    J_plus = np.zeros((num_params, 1))
    J_minus = np.zeros((num_params, 1))
    gradapprox = np.zeros((num_params,1))
    
    for i in range(num_params):
        thetaplus = np.copy(params_values)
        thetaplus[i][0] += epsilon
        J_plus[i],_ = forward_propagation_n(X,Y,vector_to_dictionary(thetaplus))
        
        thetaminus = np.copy(params_values)
        thetaminus[i][0] -= epsilon
        J_minus[i],_ = forward_propagation_n(X,Y,vector_to_dictionary(thetaminus))
        gradapprox[i]=(J_plus[i]-J_minus[i])/(2.*epsilon)
    
    numerator = np.linalg.norm(grads-gradapprox)
    denominator = np.linalg.norm(grads)+np.linalg.norm(gradapprox)
    difference = numerator/denominator
    if difference>1e-7:
        print(str(difference) + " something wrong")
    else:
        print(str(difference) + " good")
    return difference

if __name__ == '__main__':
    np.random.seed(1)
    X = np.random.randn(4,3)
    Y = np.array([1, 1, 0])
    W1 = np.random.randn(5,4) 
    b1 = np.random.randn(5,1) 
    W2 = np.random.randn(3,5) 
    b2 = np.random.randn(3,1) 
    W3 = np.random.randn(1,3) 
    b3 = np.random.randn(1,1) 
    params = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2,
              "W3": W3,
              "b3": b3}

    cost,cache = forward_propagation_n(X,Y,params)
    gradients = backward_propagation_n(X,Y,cache)
    difference = gradient_check_n(params,gradients,X,Y)

1.39243295089e-08 good
