In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases import *
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
from public_tests import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

### Initialization

We will write two helper functions to initialize the parameters for your model:
* The first function will be used to initialize parameters for a two layer model.
* The second one generalizes this initialization process to L layers

In [2]:
# initialize the parameters of the 2-layer neural network
"""
args:
    n_x         : size of the input layer
    n_h         : size of the hidden layer
    n_y         : size of the output layer
returns:
    parameters  : python dict containing parameters (W1, b1, W2, b2)
"""

def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(1)
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [3]:
parameters = initialize_parameters(3, 2, 1)

print("W1 = " + str (parameters["W1"]))
print("b1 = " + str (parameters["b1"]))
print("W2 = " + str (parameters["W2"]))
print("b2 = " + str (parameters["b2"]))

W1 = [[ 0.01624345 -0.00611756 -0.00528172]
 [-0.01072969  0.00865408 -0.02301539]]
b1 = [[0.]
 [0.]]
W2 = [[ 0.01744812 -0.00761207]]
b2 = [[0.]]


The initialization for a deeper L-layer neural netowkr is more complicated because there are more weight matrices and bias vectors.

Here's an implementation for L = 1 (one layer neural network). It should inspire you to implement the general case.

if L == 1:

    parameters["W" + str(L)] = np.random.randn(layer_dims[1], layer_dims[0]) * 0.01
    
    parameters["b" + str(L)] = np.zeros((layer_dims[1], 1))

In [4]:
def initialize_parameters_deep(layer_dims):
    """
    Args:
        layer_dims      : python array (list) containing the dimensions of each layer
    returns:
        parameters 
    """
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims) # number of layers in the network

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[1], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    
    return parameters

In [5]:
parameters = initialize_parameters_deep([5, 4, 3])

print("W1 = " + str (parameters["W1"]))
print("b1 = " + str (parameters["b1"]))
print("W2 = " + str (parameters["W2"]))
print("b2 = " + str (parameters["b2"]))

W1 = [[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]]
W2 = [[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]
 [-0.01244123 -0.00626417 -0.00803766 -0.02419083]]
b2 = [[0.]
 [0.]
 [0.]]


### Forward Propagation Module

Now that we have initialized our parameters, we can do the forward propagation module. We'll start by implementing some basic functions that we can use again later when implementing the model. Now, we'll complete three functions in this order:
* LINEAR
* LINEAR -> ACTIVATION where ACTIVATION will be either ReLU or Sigmoid
* [LINEAR -> RELU] X (L-1) -> LINEAR -> SIGMOID (whole model)

In [6]:
def linear_forward(A, W, b):
    """
    args:
        A       : activations from previous layer (size of prev layer, m examples)
        W       : weights matrix (size of current layer, size of prev layer)
        b       : bias vector (size of current layer, 1)
    returns:
        Z       : input of activation function (pre-activation parameter)
        cache   : a tuple containing args; stored for back-prop
    """
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    
    return Z, cache

In [7]:
t_A, t_W, t_b = linear_forward_test_case()
t_Z, t_linear_cache = linear_forward(t_A, t_W, t_b)
print("Z = " + str(t_Z))

Z = [[ 3.26295337 -1.23429987]]


#### Linear Activation Forward

In this notebook, we'll use two activation function:
* sigmoid   # A, activation_cache = sigmoid(Z)
* ReLU      # A, activation_cache = relu(Z)

For added convenience, we're going to group two function (linear and Activation) into one function (LINEAR -> ACTIVATION). Hence, we'll implement a function that does the LINEAR forward step, followed by an ACTIVATION forward step

In [8]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    cache = (linear_cache, activation_cache)

    return A, cache

In [9]:
t_A_prev, t_W, t_b = linear_activation_forward_test_case()

t_A, t_linear_activation_cache = linear_activation_forward(t_A_prev, t_W, t_b, activation="sigmoid")
print("With sigmoid: A = " + str(t_A))

t_A, t_linear_activation_cache = linear_activation_forward(t_A_prev, t_W, t_b, activation="relu")
print("With ReLU: A = " + str(t_A))

With sigmoid: A = [[0.96890023 0.11013289]]
With ReLU: A = [[3.43896131 0.        ]]


#### L-Layer Model

For even more convenience when implementing the L-layer Neural Net, we will need a function that replicates the previous one (linear_activation_forward with RELU) L-1 times, then follows that with one linear_activation_forwardwith SIGMOID.

[LINEAR -> RELU] X (L-1) -> LINEAR -> SIGMOID* model

In [10]:
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2        # number of layers in the neural network

    # implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    # the for loop starts at 1 because layer 0 is the input
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
        caches.append(cache)

    # implement LINEAR -> SIGMOID. Add "cache" to the "caches" list
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")
    caches.append(cache)

    return AL, caches

In [11]:
t_X, t_parameters = L_model_forward_test_case_2hidden()
t_AL, t_caches = L_model_forward(t_X, t_parameters)

print("AL = " + str(t_AL))

AL = [[0.03921668 0.70498921 0.19734387 0.04728177]]


### Cost Function

we can now implement forward and backward propagation. We need to compute the cost, in order to check whether our model is actually learning.

In [12]:
def compute_cost(AL, Y):
    m = Y.shape[1]

    cost = -(np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL)))/m
    cost = np.squeeze(cost) # to make sure our cost's shape is what we expect (e,.g. this turns [[18]] into [18])

    return cost

In [13]:
t_Y, t_AL = compute_cost_test_case()
t_cost = compute_cost(t_AL, t_Y)

print("Cost: " + str(t_cost))

Cost: 0.2797765635793423


#### Backward Propagation Module

just as we did for the forward propagation module, we'll implement helper functions for backpropagation. Remember that backpropagation is used to calculate the gradient of the loss function with respect to the parameters.

Similarly to forward propagation, we're going to build the backward propagation in 3 steps:
<ol>
<li> LINEAR backward </li>
<li> LINEAR -> ACTIVATION backward where ACTIVATION computes the derivative of either the ReLU or sigmoid activation </li>
<li> [LINEAR -> RELU] X (L-1) -> LINEAR -> SIGMOID backward (whole model)</li>
</ol>

In [16]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (np.dot(dZ, A_prev.T))/m
    db = (np.sum(dZ, axis=1, keepdims=True))/m
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [18]:
t_dZ, t_linear_cache = linear_backward_test_case()
t_dA_prev, t_dW, t_db = linear_backward(t_dZ, t_linear_cache)

print("dA_prev: " + str(t_dA_prev))
print("dW: " + str(t_dW))
print("db: " + str(t_db))

dA_prev: [[-1.15171336  0.06718465 -0.3204696   2.09812712]
 [ 0.60345879 -3.72508701  5.81700741 -3.84326836]
 [-0.4319552  -1.30987417  1.72354705  0.05070578]
 [-0.38981415  0.60811244 -1.25938424  1.47191593]
 [-2.52214926  2.67882552 -0.67947465  1.48119548]]
dW: [[ 0.07313866 -0.0976715  -0.87585828  0.73763362  0.00785716]
 [ 0.85508818  0.37530413 -0.59912655  0.71278189 -0.58931808]
 [ 0.97913304 -0.24376494 -0.08839671  0.55151192 -0.10290907]]
db: [[-0.14713786]
 [-0.11313155]
 [-0.13209101]]


#### Linear Activation Backward

Next, we'll create a function that merges the two helper functions: linear_backward and the backward step for the activation linear_activation_backward

To help us implement linear_activation_backward, two backward functions have been provided:
* sigmoid_backward: implements the backward propagation for SIGMOID unit:

dZ = sigmoid_backward(dA, activation_cache)
* relu_backward: implements the backward propagation for RELU unit:

dZ = relu_backward(dA, activation_cache)

if g(.) is the activation function, sigmoid_backward and relu_backward compute

In [19]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db

In [20]:
t_dAL, t_linear_activation_cache = linear_activation_backward_test_case()

t_dA_prev, t_dW, t_db = linear_activation_backward(t_dAL, t_linear_activation_cache, activation = "sigmoid")
t_dA_prev, t_dW, t_db = linear_activation_backward(t_dAL, t_linear_activation_cache, activation = "relu")