https://blog.csdn.net/u012328159/article/details/80081962

## 手写神经网络

In [15]:
def initialize_parameters(layer_dims):
    """
    :param layer_dims: list，每一层的单元数
    """
    np.random.seed(3)
    L = len(layer_dims)
    params = {}
    for l in range(1,L):
        params["W"+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.1
        params["b"+str(1)] = np.zeros((layer_dims[1],1))
    return params

In [2]:
def linear_forward(x,w,b):
    z = np.dot(w,x) + b
    return z

In [3]:
def relu_forward(Z):
    A = np.maximum(0,Z)
    return A

def sigmod(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

In [4]:
def forward_propagation(X, params):
    """
    X -- input dataset, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2",...,"WL", "bL"
                    W -- weight matrix of shape (size of current layer, size of previous layer)
                    b -- bias vector of shape (size of current layer,1)
    :return:
    AL: the output of the last Layer(y_predict)
    caches: list, every element is a tuple:(W,b,z,A_pre)
    """
    L = len(params) // 2
    A = X
    caches = []
    
    # calculate from 1 to L-1 layer
    for l in range(1,L):
        W = params["W"+str(l)]
        b = params["b"+str(l)]
        z = linear_forward(A, W, b)
        caches.append((A, W, b, z))
        A = relu_forward(z)
    
    # calculate Lth layer
    WL = params["W"+str(L)]
    bL = parmas["b"+str(L)]
    zL = linear_forward(A, WL, bL)
    caches.append((A, W, b, z))
    AL = sigmod(zL)
    return AL,caches

In [5]:
def compute_cost(AL, Y):
    """
    :param AL: 最后一层的激活值，即预测值，shape:(1,number of examples)
    :param Y:真实值,shape:(1, number of examples)
    :return:
    """
    m = Y.shape[1]
    cost = 1./ m*np.nansum(
        np.multiply(-np.log(AL),Y) +
        np.multiply(-np.log(1 - AL), 1 - Y)
    )
    cost = np.squeeze(cost)
    return cost

In [6]:
def relu_backward(dA, Z):
    """
    :param Z: the input of activation function
    :param dA:
    :return:
    """
    dout = np.multiply(dA, np.int64(Z > 0)) # J对Z求导
    return dout

In [8]:
def linear_backward(dZ, cache):
    """
    :param dZ: Upstream derivative, the shape (n^[l+1],m)
    :param A: input of this layer
    :return:
    """
    A, W, b, z = cache
    dW = np.dot(dZ, A.T)
    db = np.sum(dZ, axis=1.,keepdims=True)
    da = np.dot(W.T, dZ)
    return da, dW, db

In [11]:
def backward_propagation(AL, Y, caches):
    """
    Implement the backward propagation presented in figure 2.
    Arguments:
    X -- input dataset, of shape (input size, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat)
    caches -- caches output from forward_propagation(),(W,b,z,pre_A)

    Returns:
    gradients -- A dictionary with the gradients with respect to dW,db
    """
    m = Y.shape[1]
    L = len(caches) - 1
    #calculate the Lth layer gradients
    dz = 1. / m * (AL - Y)
    da, dWL, dbL = linear_backward(dz, caches[L])
    gradients = {"dW"+str(L+1):dWL, "db" + str(L+1):dbL}
    
    #calculate from L-1 to 1 layer gradients
    for l in reversed(range(0, L)):
        A, W, b ,z = caches[l]
        #ReLu backward -> linear backward
        #relu backward
        dout = relu_backward(da, z)
        da, dW, db = linear_backward(dout. caches[1])
        
        gradients["dW" + str(l+1)] = dW
        gradients["db" + str(l+1)] = db
    return gradients

In [12]:
def update_params(params, grads, learning_rate):
    L = len(params) // 2
    for l in range(L):
        params["W" + str(l + 1)] = params["W" + str(l + 1)] - learning_rate * grads["dW" + str(l+1)]
        params["b" + str(l + 1)] = params["b" + str(l + 1)] - learning_rate * grads["db" + str(l+1)]
    return params

In [13]:
def L_layer_model(X, Y, layer_dims, learning_rate, num_iterations):
    costs = []
    params = initialize_parameters(layer_dims)
    for i in range(0, num_iterations):
        AL, caches = forward_propagation(X, params)
        
        cost = compute_cost(AL, Y)
        if I % 1000 == 0:
            print("Cost after iteration {}: {}".format(i, cost))
            costs.append(cost)
        
        grads = backward_propagation(AL, Y , caches)
        
        params = update_params(params, grads, learning_rate)
    print('length of cost')
    print(len(costs))
    plt.clf()
    plt.plot(costs)  # o-:圆形
    plt.xlabel("iterations(thousand)")  # 横坐标名字
    plt.ylabel("cost")  # 纵坐标名字
    plt.show()
    return parameters

In [14]:
def predict(X_test,y_test,parameters):
    """
    :param X:
    :param y:
    :param parameters:
    :return:
    """
    m = y_test.shape[1]
    Y_prediction = np.zeros((1, m))
    prob, caches = forward_propagation(X_test,parameters)
    for i in range(prob.shape[1]):
        # Convert probabilities A[0,i] to actual predictions p[0,i]
        if prob[0, i] > 0.5:
            Y_prediction[0, i] = 1
        else:
            Y_prediction[0, i] = 0
    accuracy = 1- np.mean(np.abs(Y_prediction - y_test))
    return accuracy

In [None]:
def DNN(X_train, y_train, X_test, y_test, layer_dims, learning_rate= 0.001, num_iterations=30000):
    parameters = L_layer_model(X_train, y_train, layer_dims, learning_rate, num_iterations)
    accuracy = predict(X_test,y_test,parameters)
    return accuracy