### Objective
- Use non-linear units like ReLU to improve your model
- Build a deeper neural network (with more than 1 hidden layer)
- Implement an easy-to-use neural network class


In [10]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
# from testCases_v2 import *
from dnn_app_utils_v2 import load_data
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward


%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
def L_initialize_parameters(layer_dims):
    np.random.seed(3)
    params = {}
    for l in range(1, len(layer_dims)):
        params["W"+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        params["b"+str(l)] = np.zeros((layer_dims[l], 1))
    return params

In [40]:




def L_model_forward(X, params):
    A = X 
    L = int(len(params)/2) # Length of layers
#     print("Length of params "+str(params))
    caches = []
    for i in range(1, L):
        W = params["W"+str(i)]
        b = params["b"+str(i)]
        print("W :"+str(W.shape))
        print("A :"+str(A.shape))
        Z = np.dot(W, A) + b
        print("Z :"+str(Z.shape))
        A = np.maximum(0,Z) # RELU

        print("A^ :"+str(A.shape))

        caches.append([A,W,b,Z])
    Z = np.dot(params["W"+str(L)], A) + params["b"+str(L)]
    AL = 1/(1+np.exp(-Z))

    return AL, caches

In [4]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1./m)*np.sum((Y*np.log(AL) + (1-Y)*np.log(1-AL)), axis = 1)
    cost = np.squeeze(cost)
    return cost

In [5]:
def backward_propagate(cache, dAL, activation):
    A_prev, W, b, Z = cache
    if activation == "relu":
        dZ = relu_backward(dA, Z)
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, Z)
    dW = (1. / m) * np.dot(dZ, A_prev.T) 
    db = (1. / m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    return dA_prev, dW, db

def L_model_backward(AL, Y, cache):
    L = len(cache)
    dAL = np.divide(Y, AL) - np.divide((1-Y), (1-AL))
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = backward_propagate(cache, dAL, "sigmoid")
    for l in reversed(range(L-1)):
        a, b, c = backward_propagate(cache, grads["dA" + str(l + 2)], "relu")
        grads["dA" + str(l + 1)], grads["dW" + str(l + 1)], grads["db" + str(l + 1)] = a, b, c 
    return grads

In [6]:
def update_parameters(parameters, grads, learning_rate = 0.01):
    L = len(parameters)
    for i in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
    
    return parameters

In [24]:
def L_layer_modela(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost = False):
    np.random.seed(1)
    costs = []
    print(len(layers_dims))
    parameters = L_initialize_parameters(layers_dims)
    for i in range(0, num_iterations):
        AL, caches = L_model_forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)
            
    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters


In [12]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
# Reshape the training and test examples 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_x's shape: (12288, 209)
test_x's shape: (12288, 50)


In [13]:
layers_dims = [12288, 20, 7, 5, 1] #  5-layer model

In [41]:
print(len(layers_dims))
parameters = L_layer_modela(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)

5
5
Length of params {'W1': array([[ 0.01788628,  0.0043651 ,  0.00096497, ...,  0.00742033,
         0.00777721, -0.02044101],
       [-0.02034741, -0.01277108, -0.00845047, ..., -0.01592858,
         0.01189758,  0.0136909 ],
       [ 0.00736324,  0.01040032, -0.00610759, ..., -0.00719972,
         0.01342522, -0.00194119],
       ...,
       [ 0.00152689,  0.0117185 , -0.01256988, ..., -0.01793973,
         0.00977007,  0.00740467],
       [ 0.00301225,  0.01519223,  0.00774002, ..., -0.00081801,
        -0.00483844,  0.01257785],
       [ 0.01000491,  0.0052482 , -0.0007646 , ...,  0.00668237,
         0.00346636, -0.00618991]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[-4.21271010e-03,  1.35979614e-03,  3.84183394e-03,
         5.84163765e-04, 

ValueError: not enough values to unpack (expected 4, got 3)