# 0. How will work my Deep Neural Network

<u>**TEXT:**</u>
    1. Initialize neural network
    2. Loop:
        - forward propagation;
        - compute cost;
        - backward propagation;
        - update parameters;
    3. Use DNN to predict.
<u>** GENERAL CODE OF MAIN FUNCTION:**</u>
```python
def L_nn(X,Y, layers_dims, learning_rate = ..., print_cost = ..., num_iterations = ...):
        parameters = init_nn(layers_dims);  
        ...
        for epoch in range(num_iterations):
            ...
            AL, caches = model_forward(X,parameters); 
            ...
            cost = compute_cost(AL,Y);     
            ...
            grads = model_backward(AL, Y, caches);   
            ...
            parameters = update_parameters(grads, learning_rate, parameters);
            ...
        return parameters; 
```
Let's get started! 

# 1. Import required libraries

In [13]:
import numpy as np
from matplotlib import pyplot as plt
from testCases_v4 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

np.random.seed(3)

# 2. Initialize neural network        

In [14]:
def init_nn(layers_dims):
    """
    Arguments:
        layers_dims - turple of of size L (number nn layers). Each element  i.e layers_dims[i] is number
        of neurans in layer i.
        
    Return:
        parameters - dict weights of neural network ({"W1":[...],"b1":[...],...})  
    """
    parameters = dict();
    L = len (layers_dims) - 1;
    
    for l in range(1, L + 1): # l = 1,2,3...L
        parameters['W' + str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*0.01;
        parameters['b' + str(l)] = np.zeros((layers_dims[l]));
        
    return parameters;

# 3. Activation functions

In [27]:
def sigmoid(Z):
    """
    Arguments:
            Z - weighted sum of input for corresponding layer, i.e. for layer 2 Z2 = np.dot(W2,A1) + b2;
    
    Returns:
            Sigmoid value of Z
    """
    return 1/(1 + np.exp(-Z))

In [28]:
def relu(Z):
    """
    Arguments:
            Z- weighted sum of input for corresponding layer, i.e. for layer 2 Z2 = np.dot(W2,A1) + b2;
            
    Returns: 
            ReLu value of Z (max(0,Z))
    """
    return np.maximum(0,Z);

# 4. Forward propagation

In [29]:
def linear_forward(A_prev, W, b):
    cache = [];
    Z = np.dot(W,A_prev) + b;
    cache.append(Z);
    return Z, cache;

In [30]:
def linear_activation_forward(A_prev, W, b, activation):
    
    caches = dict();
    
    Z, activation_cache = linear_forward(A_prev, W, b);
    caches["activation_cache"] = activation_cache;
    
    if activation == "relu":
        A = relu(Z);
    elif activation == "sigmoid":
        A = sigmoid(Z);
        
    caches["linear_cache"] = {"W": W, "A_prev": A_prev, "b": b}
    return A, caches;

In [33]:
def model_forward(X,parameters):
    
    caches = [];
    L = len(parameters) // 2;
    A_prev = X;
    
    for l in range(1,L):
        W = parameters["W" + str(l)];
        b = parameters["b" + str(l)];
        A_prev, current_cache = linear_activation_forward(A_prev, W, b, activation = "relu");
        caches.append(current_cache);
     
    AL, current_cache = linear_activation_forward(A_prev, parameters["W" + str(L)], parameters["b" + str(L)], activation = "sigmoid" );
    caches.append(current_cache)
    return AL, caches;

# 5. Compute cost

In [39]:
def compute_cost(AL, Y):
    m = Y.shape[1];
    return -np.sum(Y*np.log(AL) + (1 - Y)*np.log(1 - AL))/m

# 6. Backward propagation

In [None]:
def linear_activation_backward(cache, dA, activation):
    
    
    linear_cache = cache['linear_cache']
    activation_cache = cache['activation_cache']
    
    if activation == 'sigmoid':
        dZ = sigmoid_backward(activation_cache, dA)
        dA_prev, dW, db = linear_backward(linear_cache, dZ)
    else if activation == 'relu':
        dZ = relu_backward(activation_cache, dA)
        dA_prev, dW, db = linear_backward(linear_cache, dZ)
    
    return dA_prev, dW, db

In [None]:
def linear_backward(linear_cache, dZ):
    m = dZ.shape[1]
    
    dW = np.dot(dZ, linear_cache['A_prev'].transpose()) / m;
    db = np.sum(dZ, axis = 1, keepdims = True) / m;
    dA_prev = np.dot(linear_cache['dW'].transpose(),dZ)
    return dA_prev, dW, db

In [None]:
def relu_backward(activation_cache, dA):
    
    return dZ;

In [None]:
def sigmoid_backward():
    
    return dZ;

In [None]:
def model_backward(AL, Y, caches):
    
    grads = dict();
    L = len(caches)
    
    dAL = - ( np.divide(Y,AL) - np.deivide(1 - Y, 1 - AL));
    dA_prev = dAL
    current_cache = caches[L-1] # 0,1,2,...,L-1
    grads["dW" + str(L)], grads["db" + str(L)], grads["dA" + (L-1)] = linear_activation_backward(current_cache, dAL, activation = "sigmoid");
    for l in range(L - 1, 0, -1):
        current_cache = caches[l-1];
        grads["dA" + str(l-1)], grads["dW" + str(l)], grads["db" + str(l)] = linear_activation_backward(current_cache, grads['dA' + str(l)], activation = "relu")
    return grads;

In [3]:
for i in reversed(range(5-1)):
    print (i)

3
2
1
0


In [4]:
for l in range(5,0,-1):
    print (l);

5
4
3
2
1
