In [1]:
       # Fitting Data of XOR with a 4-layer Neural Network

In [2]:
import numpy as np

In [3]:
X = np.array([[0, 0, 1, 1],[0, 1, 0, 1]])
Y = np.array([[0,1,1,0]])

In [4]:
layer_dims = [2, 4, 3, 3, 1]

In [5]:
def initialize_parameters_deep(layer_dims):
    y = len(layer_dims)
    L = y - 1
    parameters = {}
    
    for l in range(L):
        parameters["W" + str(l+1)] = np.random.rand(layer_dims[l+1],layer_dims[l])
        parameters["b" + str(l+1)] = np.zeros((layer_dims[l+1],1))
     
    return parameters    

In [6]:
parameters = initialize_parameters_deep(layer_dims)

In [7]:
print(parameters)

{'W1': array([[0.91213289, 0.68107272],
       [0.20965769, 0.00558201],
       [0.61977661, 0.85385243],
       [0.02141096, 0.78165356]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[0.52603232, 0.48061803, 0.02855018, 0.10841869],
       [0.54930832, 0.90060257, 0.26752807, 0.39335893],
       [0.05105417, 0.22291466, 0.62026301, 0.58003125]]), 'b2': array([[0.],
       [0.],
       [0.]]), 'W3': array([[0.3558408 , 0.19587885, 0.01464743],
       [0.09251784, 0.2772128 , 0.94503587],
       [0.66074648, 0.91956748, 0.79349694]]), 'b3': array([[0.],
       [0.],
       [0.]]), 'W4': array([[0.52105509, 0.96358723, 0.08792131]]), 'b4': array([[0.]])}


In [8]:
def linear_forward(A_prev, W, b):
    Z = np.dot(W,A_prev) + b
    
    linear_cache = (A_prev, W, b)
    return Z, linear_cache

In [9]:
def sigmoid(Z):
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

In [10]:
def relu(Z):
    
    A = np.maximum(0,Z)
    
    cache = Z 
    return A, cache

In [11]:
Z1, linear_cache = linear_forward(X, parameters["W1"], parameters["b1"]) 

In [12]:
print("Z1 = " + str(Z1))
print("linear_cache = "+ str(linear_cache))

Z1 = [[0.         0.68107272 0.91213289 1.59320561]
 [0.         0.00558201 0.20965769 0.2152397 ]
 [0.         0.85385243 0.61977661 1.47362904]
 [0.         0.78165356 0.02141096 0.80306452]]
linear_cache = (array([[0, 0, 1, 1],
       [0, 1, 0, 1]]), array([[0.91213289, 0.68107272],
       [0.20965769, 0.00558201],
       [0.61977661, 0.85385243],
       [0.02141096, 0.78165356]]), array([[0.],
       [0.],
       [0.],
       [0.]]))


In [13]:
def linear_activation_forward(A_prev, W, b, activation):
    Z, linear_cache = linear_forward(A_prev, W, b)
    
    if activation == "sigmoid":
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        A, activation_cache = relu(Z)
    
    cache = (linear_cache, activation_cache)
    
    return A, cache      

In [14]:
A, cache = linear_activation_forward(X, parameters["W1"], parameters["b1"], "sigmoid")

In [15]:
print(A,cache)

[[0.5        0.66397808 0.71343642 0.83106663]
 [0.5        0.5013955  0.55222327 0.55360314]
 [0.5        0.70137465 0.65016774 0.81360836]
 [0.5        0.68603638 0.50535253 0.69062963]] ((array([[0, 0, 1, 1],
       [0, 1, 0, 1]]), array([[0.91213289, 0.68107272],
       [0.20965769, 0.00558201],
       [0.61977661, 0.85385243],
       [0.02141096, 0.78165356]]), array([[0.],
       [0.],
       [0.],
       [0.]])), array([[0.        , 0.68107272, 0.91213289, 1.59320561],
       [0.        , 0.00558201, 0.20965769, 0.2152397 ],
       [0.        , 0.85385243, 0.61977661, 1.47362904],
       [0.        , 0.78165356, 0.02141096, 0.80306452]]))


In [16]:
print(len(parameters)//2)

4


In [17]:
def L_forward(X, parameters):
    L = len(parameters) // 2
    A_prev = X
    caches = []
    for l in range(1, L):
        A_prev, cache = linear_activation_forward(A_prev, parameters["W" + str(l)],parameters["b" + str(l)], "relu")
        caches.append(cache)
                                                                                             
    AL, cache = linear_activation_forward(A_prev, parameters["W" + str(L)],parameters["b" + str(L)], "sigmoid")                                                                               
    caches.append(cache)
    return AL, caches      

In [18]:
AL, caches = L_forward(X, parameters)

In [19]:
print(AL)

[[0.5        0.82849221 0.74515469 0.9338823 ]]


In [20]:
caches[0]

((array([[0, 0, 1, 1],
         [0, 1, 0, 1]]),
  array([[0.91213289, 0.68107272],
         [0.20965769, 0.00558201],
         [0.61977661, 0.85385243],
         [0.02141096, 0.78165356]]),
  array([[0.],
         [0.],
         [0.],
         [0.]])),
 array([[0.        , 0.68107272, 0.91213289, 1.59320561],
        [0.        , 0.00558201, 0.20965769, 0.2152397 ],
        [0.        , 0.85385243, 0.61977661, 1.47362904],
        [0.        , 0.78165356, 0.02141096, 0.80306452]]))

In [21]:
caches[1]

((array([[0.        , 0.68107272, 0.91213289, 1.59320561],
         [0.        , 0.00558201, 0.20965769, 0.2152397 ],
         [0.        , 0.85385243, 0.61977661, 1.47362904],
         [0.        , 0.78165356, 0.02141096, 0.80306452]]),
  array([[0.52603232, 0.48061803, 0.02855018, 0.10841869],
         [0.54930832, 0.90060257, 0.26752807, 0.39335893],
         [0.05105417, 0.22291466, 0.62026301, 0.58003125]]),
  array([[0.],
         [0.],
         [0.]])),
 array([[0.        , 0.47007257, 0.60059273, 1.0706653 ],
        [0.        , 0.91504598, 0.86409027, 1.77913625],
        [0.        , 1.01901248, 0.49014749, 1.50915997]]))

In [22]:
caches[2]

((array([[0.        , 0.47007257, 0.60059273, 1.0706653 ],
         [0.        , 0.91504598, 0.86409027, 1.77913625],
         [0.        , 1.01901248, 0.49014749, 1.50915997]]),
  array([[0.3558408 , 0.19587885, 0.01464743],
         [0.09251784, 0.2772128 , 0.94503587],
         [0.66074648, 0.91956748, 0.79349694]]),
  array([[0.],
         [0.],
         [0.]])),
 array([[0.        , 0.36143507, 0.39015181, 0.75158687],
        [0.        , 1.2601559 , 0.75830938, 2.01846529],
        [0.        , 1.96062861, 1.58035937, 3.54098798]]))

In [23]:
caches[3]

((array([[0.        , 0.36143507, 0.39015181, 0.75158687],
         [0.        , 1.2601559 , 0.75830938, 2.01846529],
         [0.        , 1.96062861, 1.58035937, 3.54098798]]),
  array([[0.52105509, 0.96358723, 0.08792131]]),
  array([[0.]])),
 array([[0.        , 1.57497876, 1.0729351 , 2.64791386]]))

In [24]:
len(caches)

4

In [25]:
def compute_cost(AL, Y):
    m = AL.shape[1]
    cost = (1/m)* (np.dot(Y, np.log(AL).T) + np.dot((1-Y), np.log(1-AL).T))
    cost = np.squeeze(cost)
    return cost
    
    

In [26]:
cost = compute_cost(AL, Y)

In [27]:
print(cost)

-0.97294429919082


In [28]:
def linear_backward(dZ, linear_cache):
    m = Y.shape[1]
    A_prev, W, b = linear_cache
    
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T, dZ)
    return dA_prev, dW, db

In [29]:
def sigmoid_backward(dA, activation_cache):
    Z = activation_cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    return dZ

In [30]:
def relu_backward(dA, activation_cache):
    Z = activation_cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    return dZ    

In [31]:
def linear_activation_backward(dA, cache, activation):
    linear_cache,activation_cache = cache
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
    elif activation == "relu":
        dZ = relu_backward(dA, activation_cache)
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db

In [32]:
def L_model_backward(AL, Y, caches):
    m = AL.shape[1]
    L = len(caches)
    grads = {}
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    return grads    

In [33]:
grads = L_model_backward(AL, Y, caches)

In [34]:
print(grads)

{'dA3': array([[ 0.26052755, -0.08936501, -0.13278845,  0.48660413],
       [ 0.48179361, -0.16526271, -0.24556569,  0.89987705],
       [ 0.04396066, -0.01507919, -0.02240633,  0.08210816]]), 'dW4': array([[0.1351191 , 0.36890771, 0.64196393]]), 'db4': array([[0.2518823]]), 'dA2': array([[ 0.        , -0.05705299, -0.08477566,  0.31066096],
       [ 0.        , -0.07718399, -0.11468854,  0.42027689],
       [ 0.        , -0.16945345, -0.25179275,  0.92269617]]), 'dW3': array([[0.09980758, 0.16730519, 0.14455337],
       [0.18457418, 0.3093975 , 0.26732256],
       [0.01684124, 0.02823059, 0.02439151]]), 'db3': array([[0.06611267],
       [0.12226216],
       [0.01115566]]), 'dA1': array([[ 0.        , -0.08106083, -0.12044917,  0.44138678],
       [ 0.        , -0.13470645, -0.2001618 ,  0.7334942 ],
       [ 0.        , -0.12738346, -0.18928049,  0.69361959],
       [ 0.        , -0.13483492, -0.20035269,  0.73419373]]), 'dW2': array([[0.09469072, 0.01219356, 0.08913555, 0.0507675 ],

In [35]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) //2
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - (learning_rate * grads["dW" + str(l+1)])
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - (learning_rate * grads["db" + str(l+1)])
    return parameters
                                                              

In [36]:
parameters = update_parameters(parameters, grads, 0.01)

In [37]:
print(parameters)

{'W1': array([[0.91133054, 0.68017191],
       [0.20832436, 0.00408504],
       [0.61851576, 0.85243684],
       [0.02007636, 0.78015516]]), 'b1': array([[-0.00059969],
       [-0.00099656],
       [-0.00094239],
       [-0.00099752]]), 'W2': array([[0.52508541, 0.48049609, 0.02765882, 0.10791102],
       [0.54802729, 0.90043761, 0.26632221, 0.39267212],
       [0.04824175, 0.2225525 , 0.61761559, 0.5785234 ]]), 'b2': array([[-0.00042208],
       [-0.00057101],
       [-0.00125362]]), 'W3': array([[0.35484273, 0.19420579, 0.0132019 ],
       [0.0906721 , 0.27411883, 0.94236264],
       [0.66057806, 0.91928517, 0.79325303]]), 'b3': array([[-0.00066113],
       [-0.00122262],
       [-0.00011156]]), 'W4': array([[0.5197039 , 0.95989815, 0.08150167]]), 'b4': array([[-0.00251882]])}


In [38]:
def deep_model(X, Y, layer_dims, num_iterations, learning_rate):
    parameters = initialize_parameters_deep(layer_dims)
    costs = []
    
    for i in range(num_iterations):
        AL, caches = L_forward(X, parameters)
        
        cost = compute_cost(AL, Y)
        
        grads = L_model_backward(AL, Y, caches)
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if i % 100 == 0:
            costs.append(cost)
    costs = np.squeeze(costs)        
    print("costs are = " + str(costs))        
        
    return parameters    
    
        

In [39]:
parameters = deep_model(X, Y, layer_dims, 2000, 0.1)

costs are = [-1.25543838 -0.19408824 -0.07980705 -0.04790789 -0.03371379 -0.02590314
 -0.0209523  -0.01756866 -0.0150987  -0.01323469 -0.01179204 -0.01060331
 -0.00964039 -0.00883409 -0.00814998 -0.00756421 -0.00705673 -0.00661175
 -0.00621983 -0.00587192]


In [40]:
AL, caches = L_forward(X, parameters)
print(AL)

[[0.01085098 0.99980113 0.99978185 0.01085098]]


In [41]:
for i in range(AL.shape[1]):
    if AL[0][i] > 0.5:
        AL[0][i] = 1
    else:
        AL[0][i] = 0
        

In [42]:
print(AL,Y)

[[0. 1. 1. 0.]] [[0 1 1 0]]
