### Deep Neural Network

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def init_params_deep(layer_widths):
    np.random.seed(3)
    params = {}
    L = len(layer_widths)
    assert(L >= 2)
    for l in range(1, L-1):
        params['w' + str(l)] = np.random.randn(layer_widths[l], layer_widths[l-1])*np.sqrt(2/layer_widths[l-1])
        params['b' + str(l)] = np.zeros((layer_widths[l], 1))
    params['w' + str(L-1)] = np.random.randn(layer_widths[L-1], layer_widths[L-2])*np.sqrt(1/layer_widths[L-2])
    params['b' + str(L-1)] = np.zeros((layer_widths[L-1], 1))
    return params

In [None]:
def linear_forward(a_prev, w, b):
    z = w.dot(a_prev) + b
    linear_cache = (a_prev, w, b)
    return z, linear_cache

In [None]:
def activation_forward(z, activation):
    assert(activation == 'softmax' or activation == 'relu')
    if activation == 'softmax':
        a = np.exp(z) / np.exp(z).sum()
    else:
        a = np.maximum(z, 0)
    return a, z

In [None]:
def forward(a_prev, w, b, activation):
    z, linear_cache = linear_forward(a_prev, w, b)
    a, activation_cache = activation_forward(z, activation)
    cache = (linear_cache, activation_cache)
    return a, cache

In [None]:
def forward_deep(X, params):
    caches = []
    a_prev = X
    L = len(params) // 2
    for layer in range(1, L):
        w = params['w' + str(layer)]
        b = params['b' + str(layer)]
        a, cache = forward(a_prev, w, b, 'relu')
        caches.append(cache)
        a_prev = a
    w = params['w' + str(L)]
    b = params['b' + str(L)]
    a, cache = forward(a_prev, w, b, 'softmax')
    caches.append(cache)
    return a, caches

In [None]:
def activation_backward(da, activation_cache, activation):
    assert(activation == 'softmax' or activation == 'relu')
    if activation == 'softmax':
        dz = da
        return dz
    else:
        z = activation_cache
        dz = np.array(da, copy=True)
        dz[z > 0] = 1
        return dz

In [None]:
def linear_backward(dz, linear_cache):
    a_prev, w, b = linear_cache
    m = a_prev.shape[1]
    dw = (1/m)*dz.dot(a_prev.T)
    db = (1/m)*np.sum(dz, axis=1, keepdims=True)
    da_prev = w.T.dot(dz)
    return da_prev, dw, db

In [None]:
def backward(da, cache, activation):
    linear_cache, activation_cache = cache
    dz = activation_backward(da, activation_cache, activation)
    da_prev, dw, db = linear_backward(dz, linear_cache)
    return da_prev, dw, db

In [None]:
def backward_deep(a, y, caches):
    gradients = {}
    L = len(caches)
    m = a.shape[1]
    dz = a - y
    current_cache = caches[L-1]
    da_prev, dw, db = backward(dz, current_cache, 'softmax')
    gradients['da' + str(L-1)] = da_prev
    gradients['dw' + str(L)] = dw
    gradients['db' + str(L)] = db
    a = a_prev
    for layer in reversed(range(L-1)):
        current_cache = caches[layer]
        a_prev, dw, db = backward(a, current_cache, 'relu')
        gradients['da' + str(layer)] = a_prev
        gradients['dw' + str(layer+1)] = dw
        gradients['db' + str(layer+1)] = db
        a = a_prev
    return gradients

In [None]:
def dnn_model(X, y, alpha=0.001, epochs=5000, verbose=False):
    m = X.shape[1]
    input_dim = X.shape[0]
    output_dim = y.shape[0]
    layer_widths = [input_dim, 100, 100, 100, 100, 100, output_dim]
    params = init_params_deep(layer_widths)
    for epoch in range(epochs):
        a, cache = forward_deep(X, params)
        gradients = backward_deep(a, y, params, cache)
        #update parameters
        #compute cross entropy cost
    # return parameters