In [None]:
import numpy as np
import matplotlib.pyplot as plt
import h5py

plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

np.random.seed(1)

def sigmoid(Z):
    return 1/(1+np.exp(-Z)), Z

def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA*s*(1-s)
    return dZ

def relu(Z):
    A = np.maximum(0, Z)
    return A, Z

def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z<=0] = 0
    return dZ

def init_params(n_x,n_h,n_y):
    W1 = np.random.randn(n_h, n_x)*0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.zeros((n_y,1))
    params = {"W1":W1,"W2":W2,"b1":b1, "b2":b2}
    return params

def init_params_deep(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)
    for l in range(1, L):
        params["W"+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        params["b"+str(l)]=np.zeros((layer_dims[l],1))
    return params

def linear_forward(A, W, b):
    Z = np.dot(W, A) +b
    cache = (A, W, b)
    return Z, cache

def linear_activation_forward(A_prev,W,b,activation):
    Z,linear_cache = linear_forward(A_prev, W, b)
    if activation=='sigmoid':
        A, activation_cache = sigmoid(Z)
    elif activation=='relu':
        A, activation_cache = relu(Z)
    cache = (linear_cache, activation_cache)
    return A, cache

def L_model_forward(X, params):
    caches = []
    A = X
    L = len(params)//2
    for l in range(1, L):
        A_prev = A
        A,cache = linear_activation_forward(A_prev, params["W"+str(l)],
                                            params["b"+str(l)],'relu')
        caches.append(cache)
        
    AL,cache = linear_activation_forward(A, params["W"+str(L)],
                                        params["b"+str(L)],'sigmoid')
    caches.append(cache)
    return AL, caches

def compute_cost(AL, Y):
#     m = Y.shape[1]
    m = len(Y)
    cost = -np.sum(np.multiply(np.log(AL),Y)+np.multiply(1-Y,np.log(1-AL)))/m
    cost = np.squeeze(cost)
    return cost

def linear_backward(dZ, cache):
    a_prev, W, b = cache
    m = a_prev.shape[1]
    dW = np.dot(dZ, a_prev.T)/m
    db = np.sum(dZ, axis=1, keepdims=True)/m
    da_prev = np.dot(W.T, dZ)
    return da_prev,dW,db

def linear_activation_backward(dA,cache, activation):
    linear_cache,activation_cache = cache
    if activation=='sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
    elif activation=='relu':
        dZ = relu_backward(dA, activation_cache)
    da_prev, dW, db = linear_backward(dZ, linear_cache)
    return da_prev,dW, db

def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -(np.divide(Y,AL) - np.divide(1-Y, 1-AL))
    current_cache = caches[L-1]
    grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)]=linear_activation_backward(dAL, current_cache, 'sigmoid')
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp,dW_temp,db_temp=linear_activation_backward(grads["dA"+str(l+1)],current_cache,'relu')
        grads['dA'+str(l)] = dA_prev_temp
        grads['dW'+str(l+1)] = dW_temp
        grads["db"+str(l+1)] = db_temp
    return grads

# 反向传播参数赋值
#     grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)]=linear_activation_backward(dAL, current_cache, 'sigmoid')
#     for l in reversed(range(L-1)):
#         current_cache = caches[l]
#         dA_prev_temp,dW_temp,db_temp=linear_activation_backward(grads["dA"+str(l+2)],current_cache,'relu')
#         grads['dA'+str(l+1)] = dA_prev_temp
#         grads['dW'+str(l+1)] = dW_temp
#         grads["db"+str(l+1)] = db_temp
#     return grads

def update_params(params, grads,learning_rate):
    L = len(params)//2
    for l in range(L):
        params["W"+str(l+1)] = params["W"+str(l+1)]-learning_rate*grads["dW"+str(l+1)]
        params["b"+str(l+1)] = params["b"+str(l+1)]-learning_rate*grads["db"+str(l+1)]
    return params    

def load_data():
    # cat(y=1) non-cat(y=0)
    trainfilepath = "E://project//data/train_catvnoncat.h5"
    testfilepath = "E://project//data/test_catvnoncat.h5"

    train_dataset = h5py.File(trainfilepath, 'r')
    test_dataset = h5py.File(testfilepath, 'r')

    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y = np.array(train_dataset["train_set_y"][:])

    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])


    m_train = train_set_x_orig.shape[0]
    m_test = test_set_x_orig.shape[0]
    num_px = train_set_x_orig.shape[1]
    train_set_x_flatten = train_set_x_orig.reshape(m_train, -1).T
    test_set_x_flatten = test_set_x_orig.reshape(m_test, -1).T

    train_set_x = train_set_x_flatten/255
    test_set_x = test_set_x_flatten/255
    return train_set_x, train_set_y,test_set_x,test_set_y


def two_layer_model(X,Y,layers_dims,learning_rate,
                   num_iter,print_cost):
    grads = {}
    costs = []
    m = X.shape[1]
    (n_x,n_h,n_y) = layers_dims
    
    params = init_params(n_x, n_h,n_y)
    W1 = params["W1"]
    W2 = params["W2"]
    b1 = params["b1"]
    b2 = params["b2"]
    
    for i in range(0,num_iter):
        A1,cache1 = linear_activation_forward(X,W1,b1,activation='relu')
        A2,cache2 = linear_activation_forward(A1,W2,b2,activation='sigmoid')
        cost = compute_cost(A2,Y)
        dA2=-(np.divide(Y,A2)-np.divide(1-Y,1-A2))
        dA1,dW2,db2=linear_activation_backward(dA2,cache2,activation='sigmoid')
        dA0,dW1,db1=linear_activation_backward(dA1,cache1,activation='relu')
        grads['dW1'] = dW1
        grads['dW2'] = dW2
        grads['db1'] = db1
        grads['db2'] = db2
        
        params = update_params(params,grads,learning_rate)
        
        W1 = params['W1']
        W2 = params['W2']
        b1 = params['b1']
        b2 = params['b2']
        
        if print_cost and i%100==0:
            print("cost after iteration {0}:{1}".format(i,np.squeeze(cost)))
            costs.append(cost)
            
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iter(per tens)')
    plt.title("learning rate = " + str(learning_rate))
    plt.show()
    return params

def L_layer_model(X,Y,layers_dims,learning_rate,num_iter,print_cost):
    costs = []
    params = init_params_deep(layers_dims)
    for i in range(0,num_iter):
        AL,caches = L_model_forward(X, params)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL,Y, caches)
        params = update_params(params,grads,learning_rate)
        if print_cost and i%100==0:
            print("cost after iteration {0}: {1}".format(i,np.squeeze(cost)))
            costs.append(costs)
    plt.plot(np.squeeze(costs))
    plt.ylabel("cost")
    plt.xlabel("iterations(per tens)")
    plt.title("learning rate = " + str(learning_rate))
    plt.show()
    return params

def predict(X,y,params):
    m = X.shape[1]
    n = len(params)//2
    p = np.zeros((1,m))
    probas,caches = L_model_forward(X,params)
    
    for i in range(0,probas.shape[1]):
        if probas[0,i]>0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    print("accuracy: " + str(np.sum((p==y)/m)))
    return p
        
if __name__ == '__main__':
    train_X,train_Y,test_X,test_Y = load_data()

    n_x = 12288
    n_h = 7
    n_y = 1
    layers_dims = (n_x, n_h, n_y)
#     params = two_layer_model(train_X,train_Y,layers_dims,0.0075,2500, True)
    params = L_layer_model(train_X,train_Y,layers_dims,0.0075,2500, True)
#     predict(train_X, train_Y, params)
#     predict(test_X, test_Y, params)

cost after iteration 0: 0.6923799160908504
cost after iteration 100: 0.6461586699206975
cost after iteration 200: 0.6317752454522948
cost after iteration 300: 0.6000906114062465
cost after iteration 400: 0.5594266141002633
cost after iteration 500: 0.5129882975097563
cost after iteration 600: 0.4548146042072384
cost after iteration 700: 0.3993880492055353
cost after iteration 800: 0.42051546995772576
cost after iteration 900: 0.36918441320112994
cost after iteration 1000: 0.36239275567923807
cost after iteration 1100: 0.34136590795711635
cost after iteration 1200: 0.3333440319337274
cost after iteration 1300: 0.26379708597074125
cost after iteration 1400: 0.16480532038932783
cost after iteration 1500: 0.14360822941781876
cost after iteration 1600: 0.12446701193149182
cost after iteration 1700: 0.7067523839110125
cost after iteration 1800: 0.09239377122581144
cost after iteration 1900: 0.07857238240203042
cost after iteration 2000: 0.06809230848758144
cost after iteration 2100: 0.059649