### Objective
- Use non-linear units like ReLU to improve your model
- Build a deeper neural network (with more than 1 hidden layer)
- Implement an easy-to-use neural network class


In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v2 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

  from ._conv import register_converters as _register_converters


In [6]:
def L_initialize_parameters(layer_dims):
    np.random.seeds(3)
    parameters = {}
    
    for l in range(1, len(layer_dims)):
        params["W"+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        params["b"+str(l)] = np.zeros(layer_dims[l], 1)
    
    return params

In [5]:
def L_model_forward(X, params):
    A = X 
    L = len(params) # Length of layers
    caches = []
    for i in range(1, L):
        W = params["W"+str(i)]
        b = params["b"+str(i)]
        Z = np.dot(A, W) + b
        A = relu(Z)
        caches.append(A, W, b, Z)
    Z = np.dot(A, params["W"+str(L)]) + params["b"+str(L)]
    AL = sigmoid(Z)
    return AL, caches

In [2]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1./m)*np.sum((Y*np.log(AL) + (1-Y)*np.log(1-AL)), axis = 1)
    cost = np.squeeze(cost)
    return cost

In [3]:
def backward_propagate(cache, dAL, activation):
    A_prev, W, b, Z = cache
    if activation == "relu":
        dZ = relu_backward(dA, Z)
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, Z)
    dW = (1. / m) * np.dot(dZ, A_prev.T) 
    db = (1. / m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    return dA_prev, dW, db

def L_model_backward(AL, Y, cache):
    L = len(cache)
    dAL = np.divide(Y, AL) - np.divide((1-Y), (1-AL))
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = backward_propagate(cache, dAL, "sigmoid")
    for l in reversed(range(L-1)):
        a, b, c = backward_propagate(cache, grads["dA" + str(l + 2)], "relu")
        grads["dA" + str(l + 1)], grads["dW" + str(l + 1)], grads["db" + str(l + 1)] = a, b, c 
    return grads

In [1]:
def update_parameters(parameters, grads, learning_rate = 0.01):
    L = len(parameters)
    for i in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
    
    return parameters

In [1]:
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost = False):
    np.random.seed(1)
    costs = []
    parameters = initialize_parameters(layers_dims)
    
    for i in range(0, num_iterations):
        AL, caches = L_model_forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)
            
    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

In [8]:
parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)

NameError: name 'train_x' is not defined