# Build a L-layer Neural Network.ipynb

## Table of Contents

* [Recap](#chapter0)
* [1. L-laers Neural Network Model](#chapter1)
    * [1.1 Functions of our L-layers Neural Network ](#section_1_1)
        * [1.1.1 Initialize parameters](#section_1_1_1)
        * [1.1.2 Forward propagation](#section_1_1_2)
        * [1.1.3 Cost function](#section_1_1_3)
        * [1.1.4 Backward Propagation](#section_1_1_4)
        * [1.1.5 Update parameters](#section_1_1_5) 
    * [1.2 L-layer Model](#section_1_2)
* [2. Load the Dataset ](#chapter2)
    * [2.1 Load the Dataset](#section_2_1)
    * [2.2 Display the Data](#section_2_2)
    * [2.3 Flatten the data](#section_2_3)
    * [2.4 Normalize the data](#section_2_4)

# Recap  <a class="anchor" id="chapter0"></a>

> Forward Propagation :

<center><img src="images/05-Deep Neural network/forward-prop.png" width = "600px"></center>

$$
\begin{cases}
    Z^{[l]} = W^{[l]} X + b^{[l]} \\
    A^{[l]} = g^{[l]}(Z^{[l]}) 
\end{cases}
$$


> Backward Propagation :

<center><img src="images/05-Deep Neural network/backward-prop.png" width = "600px"></center>

$$
\begin{cases}
    dZ^{[l]} =  (A^{[l]} - Y) \\
    dW^{[l]} = \frac{1}{m} dZ^{[l]}A^{[l-1]T} \\
    db^{[l]} = \frac{1}{m} \sum dZ^{[l]}    \\
    dA^{[l-1]} =W^{[l]T}dZ^{[l]} \\
    dZ^{[l-1]} = W^{[l]T}dZ^{[l]} * g^{[l-1]'}(Z^{[l-1]}) \\
\end{cases}
$$

$$
\begin{cases}
    dZ^{[1]} = W^{[2]T}dZ^{[2]} * g^{[1]'}(Z^{[1]}) \\
    dW^{[1]} = \frac{1}{m} dZ^{[1]} X^T \\
    db^{[1]} = \frac{1}{m} \sum  dZ^{[1]}
\end{cases}
$$


> Dimension :
- m : number of examples

$$\begin{cases}
    Z^{[l]},A^{[l]} : (n^{[l]},m) \\
    W^{[l]}: (n^{[l]},n^{[l-1]})   \\
    b^{[l]}: (n^{[l]},1)  \\
    dZ^{[l]},dA^{[l]} : (n^{[l]},m) \\
    dW^{[l]} : (n^{[l]},n^{[l-1]})   \\
    db^{[l]}: (n^{[l]},1)
\end{cases}
$$


# 1. L-layers Neural Network model <a class="anchor" id="chapter1"></a>

In [3]:
# Packages
import copy
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss


%matplotlib inline

## 1.1 Functions of our L-layers Neural Network  <a class="anchor" id="section_1_1"></a>

### 1.1.1 Initialize parameters  <a class="anchor" id="section_1_1_1"></a>

In [24]:
def initialize_parameters(hidden_layers_dim,n_input,n_output):
    """
    Initialize the l parameters of the L-layer neural network
    
    Arguments:
    hidden_layers_dim -- list of hidden units in the hidden layers
    n_input -- features of the input matrix X
    n_output -- number units in the output layer 
    
    Returns:
    parameters -- a dictionary containing W1, W2, b1, and b2, ....
    """

    # seed
    np.random.seed(3)

    # init cache
    parameters = {}
    l = len(hidden_layers_dim)

    for i,n_dim in enumerate(hidden_layers_dim):
        
        if i == 0:
            W = np.random.randn(n_dim,n_input) * 0.01
            b = np.zeros((n_dim,1))
        else:
            W = np.random.randn(n_dim,hidden_layers_dim[i-1]) * 0.01
            b = np.zeros((n_dim,1))

        # getting params
        parameters[f"W{i+1}"] = W
        parameters[f"b{i+1}" ] = b

    # output layer
    W = np.random.randn(n_output,hidden_layers_dim[-1]) * 0.01
    b = np.zeros((n_output,1))
    

    # getting params
    parameters[f"W{l+1}"] = W
    parameters[f"b{l+1}" ] = b


    return parameters


In [36]:
# test
layers_dim = [5,5,3]

params = initialize_parameters(layers_dim,4,1)
params

{'W1': array([[ 0.01788628,  0.0043651 ,  0.00096497, -0.01863493],
        [-0.00277388, -0.00354759, -0.00082741, -0.00627001],
        [-0.00043818, -0.00477218, -0.01313865,  0.00884622],
        [ 0.00881318,  0.01709573,  0.00050034, -0.00404677],
        [-0.0054536 , -0.01546477,  0.00982367, -0.01101068]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[-0.01185047, -0.0020565 ,  0.01486148,  0.00236716, -0.01023785],
        [-0.00712993,  0.00625245, -0.00160513, -0.00768836, -0.00230031],
        [ 0.00745056,  0.01976111, -0.01244123, -0.00626417, -0.00803766],
        [-0.02419083, -0.00923792, -0.01023876,  0.01123978, -0.00131914],
        [-0.01623285,  0.00646675, -0.00356271, -0.01743141, -0.0059665 ]]),
 'b2': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W3': array([[-0.00588594, -0.00873882,  0.00029714, -0.02248258, -0.00267762],
        [ 0.01013183,  0.00852798,  0.01108187,  0.01119391,  

### 1.1.2 Forward propagation  <a class="anchor" id="section_1_1_2"></a>

In [30]:
def activation_function(Z,activation_name):

    """
    Compute the activation function
    
    Arguments:
    activation_name -- name of the activation function choosen
    Z -- items

    Returns:
    activation -- activation value
    """

    if activation_name.lower() == "sigmoid":

        activation = 1 / (1+np.exp(-Z))

    elif activation_name.lower() == "relu":

        activation = np.maximum(0,Z)

    elif activation_name.lower() == "tanh":

        activation = np.tanh(Z)
    else:
        # default activation function
        activation = np.maximum(0,Z)


    assert(activation.shape == Z.shape)

    return activation

In [33]:
# test activation
Z = np.random.randn(10,100)
A = activation_function(Z,"sigmoid")

print(A.shape)

(10, 100)


In [58]:
def Forward_propagation(X,parameters):

    """
    Compute the forward propagation on the L layers
    
    Arguments:
    X -- Input
    parameters -- dictionnary containing the parameters of each layers

    Returns:
    caches -- list of dictionnaries. each dictionnay contains the linear result and activation of each layer
    """

    # caches and layers
    caches = []
    L = len(parameters)//2
    
    # Input X
    A_prev = X

    for i in range(L):

        # getting the parameters of the i-th layers
        W = parameters[f"W{i+1}"]
        b = parameters[f"b{i+1}"]

        # linear and activation result
        Z = np.dot(W,A_prev) + b

        # activation
        if i == L - 1 :
            A = activation_function(Z,"sigmoid")
        else:
            A = activation_function(Z,"relu")

         # append cache
        cache = {f"Z{i+1}":Z, f"A{i+1}":A}
        caches.append(cache)

        # change A_prev
        A_prev = A

    return caches

In [59]:
# test
layers_dim = [5,5,3]
X = np.random.randn(3,100)
params = initialize_parameters(layers_dim,X.shape[0],1)

caches = Forward_propagation(X,params)
caches

[{'Z1': array([[-8.78025465e-03, -2.92849067e-02,  1.52211605e-02,
           2.54422589e-02, -4.90038027e-03,  2.69300353e-02,
          -4.83454313e-05,  3.95270042e-03, -5.12391839e-03,
          -1.21468028e-02,  3.27799453e-03,  3.51831591e-02,
          -1.35377549e-02,  1.24135334e-02, -3.08190694e-03,
           2.97888467e-02,  8.75962702e-03,  5.09043558e-03,
           6.98297307e-03,  2.04038293e-03, -2.03577847e-02,
           2.70430585e-02, -1.66943388e-02,  7.25111050e-03,
           9.34741129e-03, -1.63316542e-02, -4.49349091e-03,
           1.89299420e-02, -5.08609640e-03, -1.47280614e-02,
          -4.90463889e-02, -1.86088904e-02, -1.13106088e-02,
          -1.71512258e-02, -1.83220232e-02,  6.80673410e-03,
          -9.40732629e-03, -6.95621283e-03,  2.11761175e-02,
          -1.12024863e-02, -4.33468310e-03,  3.71765516e-02,
           1.32403309e-02, -2.34709908e-02, -2.09851640e-03,
           1.35581308e-02,  2.45989734e-02,  7.18736006e-03,
          -4.52991

### 1.1.3 Cost function  <a class="anchor" id="section_1_1_3"></a>

In [81]:
def cost_function(AL,y):

    """
    Compute the cost after the forward propagation

    Arguments:
    AL -- L-activation 
    y -- true labels of the dataset dim = (n_y,m) | m examples, n_y nodes of the output layer

    Returns:
    cost -- cost value
    """
    # m  examples
    m = y.shape[1]

    # cost
    cost = -(1/m) *(np.dot(y,np.log(AL).T) + np.dot((1-y),np.log(1-AL).T))
    cost = np.squeeze(cost)

    return cost


In [83]:
# Test the cost
y_true = np.random.randint(0,2,(1,100))
y_pred = np.random.random((1,100))

# check with the true log_loss
cost = cost_function(y_pred,y_true)
l_cost = log_loss(y_true.T,y_pred.T)
print(cost)
print(l_cost)

0.9266947078039337
0.9266947078039336


### 1.1.4 Backward Propagation  <a class="anchor" id="section_1_1_4"></a>

In [84]:
def backward_propagation(forward_cache,parameters):
    







    return None

### 1.1.5 Update parameters  <a class="anchor" id="section_1_1_5"></a>

## 1.2 L-layer Model <a class="anchor" id="section_1_2"></a>