In [6]:
"""
梯度检验
"""
import numpy as np
import gc_utils
def forward_propagation(x , theta):
    J = np.dot(theta ,x)
    return J

def backward_propagation(x , theta):
    dtheta = x
    return dtheta

def gradient_check(x,theta,epsilon = 1e-7):
    thetaplus = theta + epsilon
    thetaminus = theta - epsilon
    J_puls = forward_propagation(x , thetaplus)
    J_minus = forward_propagation(x , thetaminus)
    gradapprox = (J_puls - J_minus) / (2 * epsilon)
    
    grad = backward_propagation(x , theta)
    
    numerator = np.linalg.norm(grad - gradapprox)
    denomintor = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denomintor
    
    if difference < 1e-7:
        print("梯度正常")
    else:
        print("梯度超出阈值")
    return difference
"""
x , theta = 2 , 4
difference = gradient_check(x,theta)
print(difference)   
"""

def forward_propagation_n(X,Y,parameters):
    m = X.shape[1]
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    
    Z1 = np.dot(W1,X) + b1
    A1 = gc_utils.relu(Z1)
    Z2 = np.dot(W2,A1)+ b2
    A2 = gc_utils.relu(Z2)
    Z3 = np.dot(W3,A2)+ b3
    A3 = gc_utils.sigmoid(Z3)
    
    logprobs = np.multiply(-np.log(A3),Y) + np.multiply(-np.log(1 - A3) , (1-Y))
    cost = (1 / m ) * np.sum(logprobs)
    
    cache = (Z1,A1,W1,b1,Z2,A2,W2,b2,Z3,A3,W3,b3)
    
    return cost , cache

def backward_propagation_n(X,Y,cache):
    m = X.shape[1]
    (Z1,A1,W1,b1,Z2,A2,W2,b2,Z3,A3,W3,b3) = cache
    
    dZ3 = A3 - Y
    dW3 = (1 / m) * np.dot(dZ3,A2.T)
    db3 = (1 / m) * np.sum(dZ3,axix = 1,keepdims = True)
    
    dA2 = np.dot(W3.T,dZ3)
    dZ2 = np.multiply(dA2 , int64(A2 > 0))
    dW2 = (1 / m) * np.dot(dZ2,A1.T)
    db3 = (1 / m) * np.sum(dZ2,axix = 1,keepdims = True)
    
    dA2 = np.dot(W2.T,dZ2)
    dZ2 = np.multiply(dA1 , int64(A1 > 0))
    dW2 = (1 / m) * np.dot(dZ1,X.T)
    db3 = (1 / m) * np.sum(dZ1,axix = 1,keepdims = True)
    
    gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,
                 "dA2": dA2, "dZ2": dZ2, "dW2": dW2, "db2": db2,
                 "dA1": dA1, "dZ1": dZ1, "dW1": dW1, "db1": db1}

    return gradients

def gradient_check_n(parameters,gradients,X,Y,epsilon = 1e-7):
    parameters_values ,keys = gc_utils.dictionary_to_vector(parameters)
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_value.shape[0]
    J_plus = np.zeros(num_parameters , 1)
    J_minus = np.zeros(num_parameters , 1)
    gradapprox = np.zeros(num_parameters , 1)
    
    for i in range(num_parameters):
        theta_plus = np.copy(parameters_values) #这是深拷贝，拷贝值和被拷贝值修改不会互相影响
        theta_plus[i][0] = theta_plus[i][0] + epsilon
        J_plus , cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(theta_plus))
        
        theta_minus = np.copy(parameters_values) #这是深拷贝，拷贝值和被拷贝值修改不会互相影响
        theta_minus[i][0] = theta_plus[i][0] + epsilon
        J_minus , cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(theta_minus))
        
        gradapprox = (J_plus - J_minus) / (2 * epsilon)
    numerator = np.linalg.norm(grad - gradapprox)                                     # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)                   # Step 2'
    difference = numerator / denominator                                              # Step 3'

    if difference < 1e-7:
        print("梯度检查：梯度正常!")
    else:
        print("梯度检查：梯度超出阈值!")

    return difference

    