In [2]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import init_utils   
import reg_utils    
import gc_utils     
import testCases

%matplotlib inline 
plt.rcParams['figure.figsize'] = (7.0, 4.0) 
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

#一维线性的梯度检查

def forward_propagation(x, theta):
    J = theta * x
    return J

def backward_propagation(x, theta):
    dtheta = x
    return dtheta

def gradient_check(x, theta, epsilon = 1e-7):
    """
    功能：
    实现以为函数的梯度检测
    参数：
    x:输入
    theta:一维线性函数的参数
    epsilon:用于计算进寺剃度的微量偏移
    返回值：
    difference：dtheta和dtheta_approx的欧式距离
    """
    #theta+ = theta + epsilon
    #theta- = theta - epsilon
    #J+ = J(theta+)
    #J- = J(theta-)
    #grad_approx = (J+ - J-) / (2 * epsilon)
    
    theta_plus = theta + epsilon
    theta_minus = theta - epsilon
    J_plus = forward_propagation(x, theta_plus)
    J_minus = forward_propagation(x, theta_minus)
    grad_approx = (J_plus - J_minus) / (2 * epsilon)
    
    grad = backward_propagation(x, theta)
    
    # difference = (\\grad - grad_approx\\2) / (\\grad\\2 + \\grad_approx\\2)
    
    difference = np.linalg.norm(grad - grad_approx) / (np.linalg.norm(grad) + np.linalg.norm(grad_approx))

    if difference < 1e-7:
        print("梯度检查：梯度正常!")
    else:
        print("梯度检查：梯度超出阈值!")
    
    return difference


#多维参数
def forward_propagation_n(X, Y, parameters):
    """
    功能：
    实现正向传播并计算成本
    参数：
    X：输入数据
    Y：输入数据标签
    parameters:神经网络的参数
    返回值：
    cost:成本
    """
    m = X.shape[1]
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    
    #[linear->relu]->[linear->relu]->[linear->sigmoid]
    Z1 = np.dot(W1, X) + b1
    A1 = gc_utils.relu(Z1)
    
    Z2 = np.dot(W2, A1) + b2
    A2 = gc_utils.relu(Z2)
    
    Z3 = np.dot(W3, A2) + b3
    A3 = gc_utils.sigmoid(Z3)
    
    #计算成本函数
    logprobs = np.multiply(-np.log(A3), Y) + np.multiply(-np.log(1 - A3), 1 - Y)
    cost = (1 / m) * np.sum(logprobs)
    
    cache = (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3)

    return cost, cache

    
def backward_propagation_n(X, Y, cache):
    """
    功能：
    实现反向传播
    参数：
    X:输入数据
    Y:输入数据的标签
    cache:用于反向传播的缓存
    返回值：
    gradients：各参数的梯度
    """
    m = X.shape[1]
    (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
    
    
    dZ3 = A3 - Y
    dW3 = (1. / m) * np.dot(dZ3,A2.T)
    dW3 = 1. / m * np.dot(dZ3, A2.T)
    db3 = 1. / m * np.sum(dZ3, axis = 1, keepdims = True)
    
    dA2 = np.dot(W3.T, dZ3)
    dZ2 = np.multiply(dA2, np.int64(A2 > 0))
    dW2 = 1. / m * np.dot(dZ2, A1.T)
    db2 = 1. / m * np.sum(dZ2, axis = 1, keepdims = True)
    
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = np.multiply(dA1, np.int64(A1 > 0))
    dW1 = 1. / m * np.dot(dZ1, X.T)
    db1 = 1. / m * np.sum(dZ1, axis = 1, keepdims = True)

    gradients = {
        "dZ3":dZ3, "dW3":dW3, "db3":db3,
        "dZ2":dZ2, "dW2":dW2, "db2":db2, "dA2":dA2,
        "dZ1":dZ1, "dW1":dW1, "db1":db1, "dA1":dA1
    }
    
    return gradients

def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    """
    功能：
    梯度检测
    参数：
    parameters:神经网络的参数
    gradients：各参数的梯度
    X:输入数据
    Y:输入数据的标签
    epsilon:用于计算进寺剃度的微量偏移
    返回值：
    difference：dtheta和dtheta_approx的欧式距离

    """
    #初始化参数
    parameters_values , keys = gc_utils.dictionary_to_vector(parameters) #keys用不到
    grad = gc_utils.gradients_to_vector(gradients)
    
    num_parameters = parameters_values.shape[0]
    

    J_plus = np.zeros((num_parameters,1))
    J_minus = np.zeros((num_parameters,1))
    grad_approx = np.zeros((num_parameters,1))
    
    #计算grad_approx
    for i in range(0, num_parameters):
        #计算J_plus[i]
        theta_plus = np.copy(parameters_values)
        theta_plus[i][0] = theta_plus[i][0] + epsilon
        J_plus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_plus))
        
        
        #计算J_minus[i]
        theta_minus = np.copy(parameters_values)
        theta_minus[i][0] = theta_minus[i][0] - epsilon
        J_minus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_minus))
    
        grad_approx[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
    
    #计算欧氏距离                  
    difference = np.linalg.norm(grad - grad_approx) / (np.linalg.norm(grad) + np.linalg.norm(grad_approx))                                             
    

    if difference < 1e-7:
        print("梯度检查：梯度正常!")
    else:
        print("梯度检查：梯度超出阈值!")
    
    
    
    return difference 

#测试gradient_check
print("-----------------测试gradient_check-----------------")
X, Y, parameters = testCases.gradient_check_n_test_case()

cost, cache = forward_propagation_n(X, Y , parameters)
gradients = backward_propagation_n(X, Y, cache)
difference = gradient_check_n(parameters, gradients, X, Y)
print("difference:" + str(difference))

-----------------测试gradient_check-----------------
梯度检查：梯度超出阈值!
difference:1.1890913024229996e-07
