# 使用Numpy建立Neural Network的函式

Reference from https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795?fbclid=IwAR3foWAsx3Yg0hWRBPxfbXPVsO_SNHdlwSuGbxdmpJD4IaZ9lGXKneDW0Ps



In [5]:
#nn_architecture.py
nn_architecture = [
    {"input_dim": 2, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]

In [7]:
#Initialization parameters(w,b) init_layer.py
def init_layer(nn_architecture,seed = 99):
    np.radom.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_outper_size = layer["output_dim"]

        params_values['w' + str(layer_idx)] = np.random.randn(    
            layer_output_size, layer_input_size)*0.1
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size,1)*0.1

    return params_values

## Activation Function


In [8]:
# activations.py
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA* sig * (1-sig)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = Ture)
    dZ[Z <= 0] = 0;
    return dZ

## Forward propagation


In [10]:
#Single layer forward propagation step  single_layer_forward_propagation.py
def single_layer_forward_propagation(A_prev,W_curr,b_curr,activation = "relu"):
    
    if activation is "relu":
        activation_func = relu
    elif activation is "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation')
    
    return activation_func(Z_curr),Z_curr


In [14]:
#full_forward_propagation.py
def full_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
       
    return A_curr, memory

## Loss Function
https://cdn-images-1.medium.com/max/800/1*KoIzZjKpNdU-zHkHvrNonA.gif



In [15]:
#get_cost_value.py 
#CrossEntropy

def get_cost_value(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m *(np.dot(Y,np.log(Y_hat).T)) + np.dot(1 - Y, np.log(1 - Y_hat).T)
    return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):
    Y_hat_ =  convet_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis = 0).mean()

## Backward Propagation

In [16]:
#single_layer_backward_propagation.py

def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation = "relu")
    m = A_prev.shape[1]
    
    if activation is "relu" :
        backward_activation_func = relu_backward
    elif activation is "sigmoid" :
        backward_activation_func = sigmoid_backward
    else :
        raise Exception('Non-supported activation function')
        
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr,A_prev.T)/m
    dW_curr = np.dot(dZ_curr,A_prev.T)/m
    db_curr = np.sum(dZ_curr, axis = 1,keepdims = True)/m
    dA_curr = np.dot(W_curr.T, Dz_curr)

    return dA_prev, dW_curr, db_curr

SyntaxError: invalid syntax (<ipython-input-16-56193b850ff4>, line 3)

In [21]:
#full_backward_propagation.py
def full_backward_propagation(Y_hat, Y ,memory, params_values, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
    
    dA_prev = -(np.divide(Y, Y_hat)) - np.divide(1- Y, 1-Y_hat)
    for layer_idx_prev, layer in reversed(list(enumerate(nn.architecture))):  #reversed 反訪問序列
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]

        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, dW_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_valus["dW" + str(layer_idx_curr)] = dW_curr
        grads_valus["db" + str(layer_idx_curr)] = db_curr
        
    return grads_values

## Updating Parameters Values

In [22]:
def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;


## Training Model


In [24]:
#train.py
def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history