# Test Initialization on Neural Network

## Table of Contents

* [1. Build a L-layers Neural Network ](#chapter1)
    * [1.1 Initialize parameters](#section_1_1)
    * [1.2 Forward propagation](#section_1_2)
    * [1.3 Cost function](#section_1_3)
    * [1.4 Backward Propagation](#section_1_4)
    * [1.5 Update parameters](#section_1_5) 
    * [1.6 Predict](#section_1_6) 
    * [1.7 Model](#section_1_7)
* [2. Example on Dataset 1](#chapter2)
    * [2.1 Load the Dataset](#section_2_1)
    * [2.2 Display the Data](#section_2_2)
    * [2.3 Flatten the data](#section_2)
    * [2.4 Normalize the data](#section_2_4)
* [3. Example on Dataset 2](#chapter3)
    

In [1]:
# Packages
import copy
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss


%matplotlib inline

# 1. Build a L-layer Neural Network <a class="anchor" id="chapter1"></a>

## 1.1 initialization parameters  <a class="anchor" id="section_1_1"></a>

In [2]:
def initialize_parameters(layers_dims,n_input,n_output,type_init="random"):
    """
    Compute the initialization of the parameters in our Neural Network

    - Arguments:
    layers_dims: array containing the dimension of the hidden layers
    n_input: numbers of features in the input layer
    n_ouput: numbers of nodes in the output layer
    type_init: "zeros","random","he" type of initialization

    - Return:
    parameters: dictionnary containing of the parameters of our neural network
    """

    # init
    parameters = {}

    # add the output layer to the array
    layers_dims.append(n_output)

    # number of layers
    L = len(layers_dims)

    for i in range(L):
        
        # if i==0 take n_x features
        if i ==0:
            layer_prev = n_input
        else:
            layer_prev = layers_dims[i-1]


        # check type of initialization
        if type_init.lower() == "random":

            parameters["W" + str(i+1)] = np.random.randn(layers_dims[i],layer_prev) * 10
            parameters["b" + str(i+1)] = np.zeros((layers_dims[i],1))

        elif type_init.lower() == "zeros":

            parameters["W" + str(i+1)] = np.zeros((layers_dims[i],layer_prev))
            parameters["b" + str(i+1)] = np.zeros((layers_dims[i],1))
        
        elif type_init.lower() == "he":

            parameters["W" + str(i+1)] = np.random.randn(layers_dims[i],layer_prev) * np.sqrt(2/layer_prev)
            parameters["b" + str(i+1)] = np.zeros((layers_dims[i],1))

        else:
            # default init
            parameters["W" + str(i+1)] = np.random.randn(layers_dims[i],layer_prev) * np.sqrt(2/layer_prev)
            parameters["b" + str(i+1)] = np.zeros((layers_dims[i],1))


    return parameters

In [3]:
# test initialization
hidden_layers_dim = [5,5,3]
n_input = 2
n_output = 1

params = initialize_parameters(hidden_layers_dim,n_input,n_output,"random")
params

{'W1': array([[ 11.32560366,  -3.26043072],
        [ -2.11363361,  -4.14921609],
        [ -4.08071049, -13.67506506],
        [  9.58239619,  -9.16575335],
        [  8.16774933,  -7.11651153]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[ 19.68693908,  18.34366201,  10.59021041, -13.00776998,
          -0.25951835],
        [ -4.13002775,  14.09642669,  -3.23203552,   7.88064143,
         -12.33612276],
        [  2.50541358,  -4.40450762,  -8.51534481,   5.31997459,
          -7.0632816 ],
        [  0.99583807,   3.5692665 ,  10.22561226,  17.05603498,
          -2.97087116],
        [  0.82053045,  -9.18743069,  -0.89199566, -22.23700812,
           4.25948501]]),
 'b2': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W3': array([[ -7.08581924,   8.69877027,  -0.59588335,  -3.4479965 ,
           0.25808352],
        [  5.13541146,  18.04046724,   7.58992139, -11.12741265,
         -10.86649237],
        [

## 1.2 Forward propagation <a class="anchor" id="section_1_2"></a>

In [4]:
def activation_function(Z,activation_name):

    if activation_name.lower() == "sigmoid":

        A = 1/(1+np.exp(-Z))
    
    elif activation_name.lower() == "relu":

        A = np.maximum(0,Z)

    elif activation_name.lower() == "tanh":

        A = np.tanh(Z)
    
    else:
        # By default relu
        A = np.maximum(0,Z)

    return A

In [21]:
def forward_propagation(X,parameters,activation_name="relu"):

    """
    Compute the activation function
    
    Arguments:
    activation_name -- name of the activation function choosen
    Z -- items

    Returns:
    activation -- activation value
    """
    # init cache
    caches = []
    cache_layer = {}

    # layer
    L = len(parameters)//2

    # setting A_prev to X
    A_prev = X

    for i in range(1,L+1):

        # getting parameters
        W = parameters["W" + str(i)]
        b= parameters["b" + str(i)]

        # linear result
        Z = np.dot(W,A_prev) + b

        if i==L:
            # last layer -  sigmoid 
            A = activation_function(Z,"sigmoid")
        else:
            A = activation_function(Z,"relu")

        # adding to the cache
        cache = {"W" : W, "b":b,"A":A,"Z":Z,"A_prev": A_prev}

        # adding layer cache
        caches.append(cache)

        # setting A_prev
        A_prev = A

    return A, caches

In [22]:
# test forward propagation

layers_dim = [5,5,3]
X = np.random.randn(2,100) *0.01
params = initialize_parameters(layers_dim,X.shape[0],1)

AL,caches = forward_propagation(X,params)

# check the shape of W 
for val in caches:
    print("W shape:",val["W"].shape,"b shape:",val["b"].shape)

W shape: (5, 2) b shape: (5, 1)
W shape: (5, 5) b shape: (5, 1)
W shape: (3, 5) b shape: (3, 1)
W shape: (1, 3) b shape: (1, 1)


## 1.3 Compute Loss <a class="anchor" id="section_1_3"></a>

In [11]:
def cost_function(AL,y):

    """
    Compute the log loss L(y_pred,y)

    -- Arguments:
    y : true labels of the dataset
    AL : result of the forward propagation 

    -- Returns:
    cost : Log loss cost

    """
    # m examples
    m = y.shape[1]

    cost = -(1/m) *(np.dot(y,np.log(AL).T) + np.dot(1-y,np.log(1-AL).T))

    return np.squeeze(cost)

In [17]:
# Test the cost
y_true = np.random.randint(0,2,(1,100))
y_pred = np.random.random((1,100))

# check with the true log_loss
cost = cost_function(y_pred,y_true)
l_cost = log_loss(y_true.T,y_pred.T)
print("cost_function result:",cost)
print("log_loss sklearn:",l_cost)

cost_function result: 0.9572727403146426
log_loss sklearn: 0.9572727403146427


## 1.4 Backward propagation <a class="anchor" id="section_1_4"></a>

In [18]:
def backward_activation(dA,Z,function_name="relu"):

    """
    Compute dZ for the backward propagation

    -- Arguments:
    dA : derivative of A
    Z : linear activation
    function_name: name of the activation_function

    -- Returns:
    dZ: derivative of Z
    """

    if function_name.lower() == "sigmoid":
        # sigmoid
        s = 1/(1+np.exp(-Z))

        # derivative sigmoid
        dG = s*(1-s)

        # dZ
        dZ = dA * dG

    elif function_name.lower() == "relu":

        # relu
        r = np.maximum(0,Z)

        # derivative relu
        dG = np.int64(r>0)

        # dZ
        dZ = np.multiply(dA,dG)

    elif function_name.lower() == "tanh":

        # tanh
        th = np.tanh(Z)

        # derivative tanh
        dG = 1-np.power(th,2)

        # dZ
        dZ = dA * dG

    else:
        # by default relu
        r = np.maximum(0,Z)

        # derivative relu
        dG = np.int64(r>0)

        # dZ
        dZ = np.multiply(dA,dG)

    return dZ

In [30]:
def backward_propagation(AL,y,caches):

    # gradients
    gradients = {}

    # numbers of layers
    L = len(caches)

    # number of examples
    m = y.shape[1]

    # dAL
    dAL = - (np.divide(y, AL) - np.divide(1 - y, 1 - AL))
    
    # getting cache layer L
    current_cache = caches[-1]
    WL = current_cache['W']
    ZL = current_cache['Z']
    A_prev = current_cache['A_prev']

    dZL = backward_activation(dAL,ZL,"sigmoid")

    dW_temp = (1/m)*np.dot(dZL,A_prev.T)
    db_temp = (1/m) * np.sum(dZL,axis=1, keepdims=True)
    dA_prev_temp = np.dot(WL.T,dZL)
    
    # compute the gradient
    gradients["dW" + str(L)] = dW_temp
    gradients["db" + str(L)] = db_temp


    for i in reversed(range(L-1)):

        # getting cache layer L
        current_cache = caches[i]
        W = current_cache['W']
        Z = current_cache['Z']
        A_prev = current_cache['A_prev']

        dZ = backward_activation(dA_prev_temp,Z,"relu")

        dW_temp = (1/m)*np.dot(dZ,A_prev.T)
        db_temp = (1/m) * np.sum(dZ,axis=1, keepdims=True)
        dA_prev_temp = np.dot(W.T,dZ)
        
        # compute the gradient
        gradients["dW" + str(i+1)] = dW_temp
        gradients["db" + str(i+1)] = db_temp


    return gradients


In [34]:
# test backward propa

layers_dim = [5,5,4]
X = np.random.randn(10,100)
y_true = np.random.randint(0,2,(1,100))
params = initialize_parameters(layers_dim,X.shape[0],1)

AL,caches = forward_propagation(X,params)
    
gradients = backward_propagation(AL,y_true,caches)
gradients
for i in range(len(caches)):
    print(f"dW{i+1}",gradients[f"dW{i+1}"].shape)

dW1 (5, 10)
dW2 (5, 5)
dW3 (4, 5)
dW4 (1, 4)


  A = 1/(1+np.exp(-Z))
  dAL = - (np.divide(y, AL) - np.divide(1 - y, 1 - AL))
  dAL = - (np.divide(y, AL) - np.divide(1 - y, 1 - AL))
  dZ = dA * dG


## 1.5 Update parameters <a class="anchor" id="section_1_5"></a>

In [35]:
def update_parameters(parameters,gradients,learning_rate):

    # copy 
    params = copy.deepcopy(parameters)
    
    # nb layer
    L = len(parameters)//2

    for i in range(L):
        params["W" + str(i+1)] = params["W" + str(i+1)] - learning_rate * gradients["dW" + str(i+1)]
        params["b" + str(i+1)] = params["b" + str(i+1)] - learning_rate * gradients["db" + str(i+1)]

    return params

## 1.6 Predict <a class="anchor" id="section_1_6"></a>

In [41]:
def predict(X,parameters,function_name):

    y_pred,caches = forward_propagation(X,parameters,function_name)

    y_pred = np.where(y_pred>=0.5,1,0)

    return y_pred

In [42]:
def score(X,y,parameters):
    
    # prediction
    y_pred = predict(X,parameters,function_name="relu")

    # score
    acc = accuracy_score(y.T,y_pred.T)

    return acc

## 1.7 Model <a class="anchor" id="section_1_7"></a>

In [None]:
def neural_network_model(X_train,y_train,X_test,y_test,hidden_layers_dims,n_iter,learning_rate,function_name="relu",initialization_weights="random"):

    # init trackers
    dico_model = {}
    costs_train = []
    costs_test = []
    accs_train = []
    accs_test = []

    # init
    m = X.shape[1]
    n_input = X_train.shape[0]
    n_output = y_train.shape[0]

    # initialize parameters
    parameters = initialize_parameters(hidden_layers_dims,n_input,n_output,initialization_weights)


    for i in range(n_iter):

        # ---- forward propagation ------
        AL_train, caches = forward_propagation(X_train,parameters, activation_name=function_name)
        AL_test, caches_test = forward_propagation(X_test,parameters, activation_name=function_name)

        #  cost
        cost_train = cost_function(AL_train,y_train)
        costs_train.append(cost_train)

        cost_test = cost_function(AL_test,y_test)
        costs_test.append(cost_test)

        # accuracy 
        y_pred_train = predict(X_train,parameters,function_name)
        acc_train = accuracy_score(y_train.T,y_pred_train.T)
        accs_train.append(acc_train)

        y_pred_test = predict(X_test,parameters,function_name)
        acc_test = accuracy_score(y_test.T,y_pred_test.T)
        accs_test.append(acc_test)    

        # ---- bacward propagation ------
        gradients = backward_propagation(AL_train,y_train,caches,function_name)

        # ---- update params ------------
        params = update_parameters(parameters, gradients, learning_rate)
        parameters = params


    dict_model = {"parameters": parameters,"cost_train":costs_train,"accuracy_train":accs_train,"cost_test":costs_test,"accuracy_test":accs_test}

    return dict_model


# 2. Example on dataset 1 <a class="anchor" id="chapter2"></a>

# 3. Example on dataset 2 <a class="anchor" id="chapter3"></a>