# Simple Deep Neural Network

In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

from dnn_utils import *
from utils import load_catvnoncat_dataset

%load_ext autoreload
%autoreload 2 

SEED = 1
np.random.seed(SEED)

### Initialize parameters

In [4]:
def initialize_parameters_deep(layer_dims):
    """
    Arguments:
    layer_dims - data type is python list that containing the dimensions of each layer.
    
    Returns:
    params - data type is python dictionary that containing "W1", "b1", "W2", "b2", ..., and W[len(layer_dims)-1], b[len(layer_dims)-1]:
        W1 is matrix, size is (layer_dims[n], layer_dims[n-1])
        b1 is vector, size is (layer_dims[n], 1)
    """
    params = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        params["b" + str(l)] = np.zeros((layer_dims[l], 1))
    
    return params

### Forward propagation

In [5]:
def linear_forward(A_prev, W, b):
    """
    Arguments:
    A_prev - data type is numpy array, activations from previous layer, shape is (size of previous layer, number of samples)
    W - data type is numpy array, weight matrix, shape is (size of current layer, size of previous layer)
    b - data type is numpy array, bias vector, shape is (size of current layer, 1)
    """
    Z = np.dot(W, A_prev) + b
    cache = (A_prev, W, b)
    return Z, cache

In [7]:
def linear_activation_forward(A_prev, W, b, activation):
    """
    Arguments:
    A_prev - data type is numpy array, activations from previous layer, shape is (size of previous layer, number of samples)
    W - data type is numpy array, weight matrix, shape is (size of current layer, size of previous layer)
    b - data type is numpy array, bias vector, shape is (size of current layer, 1)
    activation - the activation to be used in this layer (sidmoid or relu)
    
    Returns:
    A - data type is numpy array, the output of the activation function, shape same as A_prev
    cache - data is python dictionary that containing "linear cache" and "activation_cache".
    """
    Z, linear_cache = linear_forward(A_prev)
    if activation == "sigmoid":
        A, activation_cache = sigmoid(Z)
    else:
        A, activation_cache = relu(Z)
        
    cache = (linear_cache, activation_cache)
    return A, cache

In [None]:
def deep_model_forward(X, params):
    """
    Arguments:
    X - data type is numpy array, input data, shape is (input size, number of samples)
    params - output of initialize_parameters_deep function
    
    Returns:
    A - data type is numpy array, last layer activation
    caches - data type is python list that containing:
        every cache of linear_activation_forward
    """
    caches = []
    A = X
    L = len(params) // 2 # number of layers in the model
    
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, params["W" + str(l)], params["b" + str(l)], "relu")
        caches.append(cache)
    
    A, cache = linear_activation_forward(A, params["W" + str(L)], params["b" + str(L)], "sigmoid")
    caches.append(cache)
    return A, caches

### Compute cost

In [8]:
def compute_cost(A, Y):
    """
    Arguments:
    A - data type is numpy array, probability vecotr corresponds to label, shape is (1, number of samples)
    Y - data type is numpy array, true "label" vector, shape is (1, number of samples)
    
    Returns:
    cost - cross-entropy cost
    """
    m = Y.shape[1]
    
    cost = (1. / m) * (-np.dot(Y, np.log(A).T) - np.dot(1 - Y, np.log(1 - A).T))
    cost = np.squeeze(cost)
    return cost