In [51]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v3 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0,4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
def initialize_parameters(n_x,n_h,n_y):
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.zeros((n_y,1))
    
    parameters = {
        "W1":W1,
        "b1":b1,
        "W2":W2,
        "b2":b2,
    }
    return parameters

In [53]:
parameters = initialize_parameters(3,2,1)
print("W1 = "+ str(parameters["W1"]))
print("b1 = "+ str(parameters["b1"]))
print("W2 = "+ str(parameters["W2"]))
print("b2 = "+ str(parameters["b2"]))

W1 = [[ 0.01624345 -0.00611756 -0.00528172]
 [-0.01072969  0.00865408 -0.02301539]]
b1 = [[ 0.]
 [ 0.]]
W2 = [[ 0.01744812 -0.00761207]]
b2 = [[ 0.]]


In [54]:
def initialize_parameters_deep(layer_dims):
    L = len(layer_dims)
    np.random.seed(3)
    for i in range(1,L):
        parameters['W'+str(i)] = np.random.randn(layer_dims[i],layer_dims[i-1])*0.01
        parameters['b'+str(i)] = np.zeros((layer_dims[i],1))
    return parameters
        

In [55]:
parameters = initialize_parameters_deep([5,4,3])
print('W1 ='+str(parameters["W1"]))
print('b1 ='+str(parameters["b1"]))
print('W2 ='+str(parameters["W2"]))
print('b2 ='+str(parameters["b2"]))

W1 =[[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
b1 =[[ 0.]
 [ 0.]
 [ 0.]
 [ 0.]]
W2 =[[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]]
b2 =[[ 0.]
 [ 0.]
 [ 0.]]


In [56]:
def linear_forward(A,W,b):
    
    Z = np.dot(W,A)+b
    cache = (A,W,b)
    
    return Z,cache

In [57]:
A,W,b = linear_forward_test_case()
Z , cache = linear_forward(A,W,b)

print("Z ="+ str(Z))
print("cache ="+ str(cache))

Z =[[ 3.26295337 -1.23429987]]
cache =(array([[ 1.62434536, -0.61175641],
       [-0.52817175, -1.07296862],
       [ 0.86540763, -2.3015387 ]]), array([[ 1.74481176, -0.7612069 ,  0.3190391 ]]), array([[-0.24937038]]))


In [58]:
def linear_activation_forward(A_prev , W , b , activation ):
    if activation == "sigmoid":
        Z , linear_cache = linear_forward(A_prev , W, b)
        A , activation_cache = sigmoid(Z)
    
    if activation == "relu":
        Z , linear_cache = linear_forward(A_prev , W, b)
        A , activation_cache = relu(Z)
        
    cache = (linear_cache , activation_cache)
    return A,cache
        

In [59]:
A_prev , W , b = linear_activation_forward_test_case()
A , linear_activation_cache = linear_activation_forward(A_prev,W,b,"sigmoid")
print("Sigmoid A ="+str(A))
A , linear_activation_cache = linear_activation_forward(A_prev , W, b ,"relu")
print("Relu A ="+str(A))

Sigmoid A =[[ 0.96890023  0.11013289]]
Relu A =[[ 3.43896131  0.        ]]


In [60]:
def L_model_forward(X , parameters):
    caches = []
    L = len(parameters) // 2
    A = X
    for l in range(1,L):
        A , cache = linear_activation_forward(A,parameters["W"+str(l)],parameters["b"+str(l)],"relu")
        caches.append(cache)
    AL , cache = linear_activation_forward(A,parameters["W"+str(L)],parameters["b"+str(L)],"sigmoid")
    caches.append(cache)
    
    return AL , caches

In [61]:
X , parameters = L_model_forward_test_case()
print(len(parameters))
AL , caches = L_model_forward(X , parameters)
print("AL ="+ str(AL))
print("length of cache list" + str(len(caches)))

4
AL =[[ 0.17007265  0.2524272 ]]
length of cache list2


In [62]:
def compute_cost(AL,Y):
    m = Y.shape[1]
    cost = -1/m*np.sum(Y*np.log(AL) + (1-Y)*np.log(1-AL))
    cost = np.squeeze(cost)
    
    return cost

In [63]:
Y , AL = compute_cost_test_case()
print("cost ="+ str(compute_cost(AL,Y)))

cost =0.414931599615


In [64]:
def linear_backward(dZ,cache):
    
    A_prev , W , b = cache
    m = A_prev.shape[1]
    
    dW = 1/m*np.dot(dZ,A_prev.T)
    db = 1/m*np.sum(dZ,axis=1,keepdims=True)
    dA_prev = np.dot(W.T ,dZ)
    
    return dA_prev , dW , db

In [65]:
dZ , linear_cache = linear_backward_test_case()
dA_prev , dW , db = linear_backward(dZ , linear_cache)
print("dA_prev ="+str(dA_prev))
print("dW = "+str(dW))
print("db = "+str(db))

dA_prev =[[ 0.51822968 -0.19517421]
 [-0.40506361  0.15255393]
 [ 2.37496825 -0.89445391]]
dW = [[-0.10076895  1.40685096  1.64992505]]
db = [[ 0.50629448]]


In [66]:
def linear_activation_backward(dA,cache,activation):
    linear_cache , activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(dA,activation_cache)
        dA_prev , dW , db = linear_backward(dZ , linear_cache)
    
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA , activation_cache)
        dA_prev , dW , db = linear_backward(dZ , linear_cache)
    
    return dA_prev , dW , db

In [67]:
AL , linear_activation_cache = linear_activation_backward_test_case()
dA_prev , dW , db = linear_activation_backward(AL , linear_activation_cache , "sigmoid")
print("dA_prev = "+ str(dA_prev))
print("dW = " + str(dW))
print("db  = "+str(db))
dA_prev , dW , db = linear_activation_backward(AL , linear_activation_cache , "relu")
print("dA_prev = "+ str(dA_prev))
print("dW = " + str(dW))
print("db  = "+str(db))

dA_prev = [[ 0.11017994  0.01105339]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576154]]
dW = [[ 0.10266786  0.09778551 -0.01968084]]
db  = [[-0.05729622]]
dA_prev = [[ 0.44090989 -0.        ]
 [ 0.37883606 -0.        ]
 [-0.2298228   0.        ]]
dW = [[ 0.44513824  0.37371418 -0.10478989]]
db  = [[-0.20837892]]


In [68]:
def L_model_backward(AL,Y,caches):
    
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -(np.divide(Y,AL) - np.divide(1-Y,1-AL))
    
    grads["dA" + str(L)] , grads["dW" + str(L)] , grads["db" + str(L)] =linear_activation_backward(dAL , caches[L-1] , "sigmoid")
    
    for l in reversed(range(L-1)):
        cache = caches[l]
        dA_prev_temp , dW_temp , db_temp = linear_activation_backward(grads["dA"+str(l+2)] , cache , "relu")
        grads["dA" + str(l+1)] = dA_prev_temp
        grads["dW" + str(l+1)] = dW_temp
        grads["db" + str(l+1)] = db_temp
    return grads

In [74]:
AL , Y_assess , caches = L_model_backward_test_case()
grads = L_model_backward(AL , Y_assess , caches)
print(str(grads["dA1"]))

[[ 0.          0.52257901]
 [ 0.         -0.3269206 ]
 [ 0.         -0.32070404]
 [ 0.         -0.74079187]]


In [75]:
def update_parameters(parameters , grads , learning_rate):
    L = len(parameters) // 2
    for l in range(L):
        parameters["W" + str(l+1)] -= learning_rate*grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] -= learning_rate*grads["db" + str(l+1)]
            
    return parameters

In [77]:
parameters , grads = update_parameters_test_case()
parameters = update_parameters(parameters , grads , 0.1)
print("W1  = " + str(parameters["W1"]))
print("b1  = " + str(parameters["b1"]))
print("W2  = " + str(parameters["W2"]))
print("b2  = " + str(parameters["b2"]))

W1  = [[-0.59562069 -0.09991781 -2.14584584  1.82662008]
 [-1.76569676 -0.80627147  0.51115557 -1.18258802]
 [-1.0535704  -0.86128581  0.68284052  2.20374577]]
b1  = [[-0.04659241]
 [-1.28888275]
 [ 0.53405496]]
W2  = [[-0.55569196  0.0354055   1.32964895]]
b2  = [[-0.84610769]]
