In [124]:
import numpy as np

In [125]:
def initialize_weights_biases(n_x, n_h, n_y):
    """
    Arguments:
    n_x -- neurons in input layer
    n_h -- neurons in hidden layer
    n_y -- neurons in output layer
    
    Returns:
    weights --
    biases --
    """
    
    weights = {}
    weights["W1"] = np.random.randn(n_h, n_x) * 0.001
    weights["W2"] = np.random.randn(n_y, n_h) * 0.001
    
    biases = {}
    biases["b1"] = np.zeros((n_h, 1))
    biases["b2"] = np.zeros((n_y, 1))
  
    return weights, biases

In [126]:
def ReLU(Z, threshold = 0):
    A = np.multiply(Z, (Z > threshold))
    return A

In [127]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

In [128]:
def forwardpropagate(X, weights, biases):
    
    """
    Arguments:
    X -- (m x n_x) dimensional input matrix
    weights --
        weights["W1"] -- (n_h x n_x) matrix
        weights["W2"] -- (n_y x n_h) matrix
    biases --
        biases["b1"] -- (n_h x 1) matrix
        biases["b2"] -- (n_y x 1) matrix
        
    Returns:
    activations --
        activations["A0"] -- (n_x x m) matrix
        activations["A1"] -- (n_h x m) matrix
        activations["A2"] -- (n_y x m) matrix
    """
    
   
    
    W1, W2 = weights["W1"], weights["W2"]
    b1, b2 = biases["b1"], biases["b2"]
    m, n_x = X.shape
    n_h = b1.shape[0]
    n_y = b2.shape[0]
    
    A0 = X.T
    
    Z1 = np.dot(W1, A0) + b1
    A1 = ReLU(Z1)
    
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    activations = {}
    assert(A0.shape == (n_x, m))
    activations["A0"] = A0
    assert(A1.shape == (n_h, m))
    activations["A1"] = A1
    assert(A2.shape == (n_y, m))
    activations["A2"] = A2
    
    return activations
    

In [151]:
def backpropagate(weights, biases, activations, Y):
    W1, W2  = weights["W1"], weights["W2"]
    b1, b2 = biases["b1"], biases["b2"]
    A0, A1, A2 = activations["A0"], activations["A1"], activations["A2"]
    m = Y.shape[1]
    
    dZ2 = A2 - Y.T
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    assert(W2.shape == dW2.shape)
    db2 = (1 / m) * np.sum(dZ2, axis = 1, keepdims = True)
    assert(b2.shape == db2.shape)
    
    dZ1 = np.dot(np.dot(W2.T, dZ2), (1 - np.power(A1, 2)))
    dW1 = (1 / m) * np.dot(dZ1, A0.T)
    assert(W1.shape == dW1.shape)
    db1 = (1 / m) * np.sum(dZ1, axis = 1, keepdims = True)
    assert(b1.shape == db1.shape)
    
    
    weight_gradients = {}
    weight_gradients["dW1"] = dW1
    weight_gradients["dW2"] = dW2
    
    bias_gradients = {}
    bias_gradients["db1"] = db1
    bias_gradients["db2"] = db2
    
    return weight_gradients, bias_gradients

In [153]:
# n_x = X.shape[1]
# n_h = 3
# n_y = Y.shape[1]
# weights, biases = initialize_weights_biases(n_x, n_h, n_y)
# activations = forwardpropagate(X, weights, biases)
# backpropagate(weights, biases, activations, Y)

({'dW1': array([[-0.04857314, -0.06476419],
         [ 0.06369571,  0.08492762],
         [-0.00036558, -0.00048744]]),
  'dW2': array([[ 0.        , -0.01179775, -0.00378254]])},
 {'db1': array([[-0.01619106],
         [ 0.02123192],
         [-0.00012186]]), 'db2': array([[-4.5000027]])})

In [130]:
X = np.array([[1, 2],
             [3, 4],
             [5, 6]])
print(X)
Y = np.array([[1],
             [2],
             [3]])
print(Y)

[[1 2]
 [3 4]
 [5 6]]
[[1]
 [2]
 [3]]


### Logistic Cost Function (Unregularized) ###


$$J(W, X, Y) = - \frac{1}{m} \sum\limits_{k=1}^K\sum\limits_{i=1}^m Y_k^{(i)}log(h_{W}(x^{(i)})_k) + (1 - Y_k^{(i)} )log(1- h_W(x^{(i)})_k) $$

In [131]:
def compute_cost_logistic(yhat, Y):
    assert(yhat.shape == Y.shape)
    
    m = Y.shape[0]
    
    cost = (-1 / m) * np.sum(np.dot(Y.T, np.log(yhat)) + np.dot((1 - Y).T, np.log(1-yhat)))
    return cost
    

In [175]:
def model_nn(X, Y, n_h, learning_rate = 0.005, num_iterations = 20):
    
    n_x = X.shape[1]
    n_y = Y.shape[1]
    
    weights, biases = initialize_weights_biases(n_x, n_h, n_y)
    
    for i in range(num_iterations):
        activations = forwardpropagate(X, weights, biases)
        yhat = activations["A2"].T
        cost = compute_cost_logistic(yhat, Y)
        weight_gradients, bias_gradients = backpropagate(weights, biases, activations, Y)
        print("Cost (iteration " + str(i) + ") = " + str(cost))
        dW1 = weight_gradients["dW1"]
        dW2 = weight_gradients["dW2"]
        db1 = bias_gradients["db1"]
        db2 = bias_gradients["db2"]
        
        weights["W1"] -= learning_rate * dW1
        weights["W2"] -= learning_rate * dW2
        biases["b1"] -=  learning_rate * db1
        biases["b2"] -= learning_rate * db2
        
    
#     print("Weights")
#     print(weights["W1"])
#     print(weights["W2"])
    
#     print("Biases")
#     print("b1")
#     print(biases["b1"])
#     print("b2")
#     print(biases["b2"])
    
#     print("Activations")
#     print("A0")
#     print(activations["A0"])
#     print("A1")
#     print(activations["A1"])
#     print("A2")
#     print(activations["A2"])

In [176]:
model_nn(X, Y, 3)

Cost (iteration 0) = 0.693150642041
Cost (iteration 1) = 0.659458543846
Cost (iteration 2) = 0.626021932101
Cost (iteration 3) = 0.59283505831
Cost (iteration 4) = 0.559895403259
Cost (iteration 5) = 0.527200368029
Cost (iteration 6) = 0.49474709565
Cost (iteration 7) = 0.462532358894
Cost (iteration 8) = 0.430552401538
Cost (iteration 9) = 0.398802709838
Cost (iteration 10) = 0.36727768157
Cost (iteration 11) = 0.335970146078
Cost (iteration 12) = 0.30487066861
Cost (iteration 13) = 0.273966543163
Cost (iteration 14) = 0.243240336482
Cost (iteration 15) = 0.212667786531
Cost (iteration 16) = 0.182214774498
Cost (iteration 17) = 0.151832970386
Cost (iteration 18) = 0.12145358535
Cost (iteration 19) = 0.090978432248
