In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def initParams(X, y, hidden_units1, hidden_units2, hidden_units3): 
    n_i = X.shape[0]
    n_y = y.shape[0]
    w1 = np.random.rand( hidden_units1 , n ) - 0.5
    b1 = np.random.rand( hidden_units1, 1  ) - 0.5
    
    w2 = np.random.rand( hidden_units2, hidden_units1) - 0.5
    b2 = np.random.rand( hidden_units2, 1 ) - 0.5
    
    w3 = np.random.rand( hidden_units3, hidden_units2) - 0.5
    b3 = np.random.rand( hidden_units3, 1) - 0.5
    
    w4 = np.random.rand( n_y , hidden_units ) - 0.5
    b4 = np.random.rand( n_y , 1 ) - 0.5
    
    return w1, b1, w2, b2, w3, b3, w4, b4

In [5]:
params = initParams( X, y, hidden_units1 = 3, hidden_units2 = 4, hidden_units3 = 3)

NameError: name 'X' is not defined

In [None]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2


def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    e_x = np.exp(x - np.max(x)) 
    return e_x / e_x.sum(axis=0)

In [5]:
def forwardPropagation(X, params):
    w1 = params[0]
    b1 = params[1]
    w2 = params[2]
    b2 = params[3]
    w3 = params[4]
    b3 = params[5]
    w4 = params[6]
    b4 = params[7]
    
    Z1 = np.dot(w1,X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(w2,A1) + b2
    A2 = np.tanh(Z2)
    Z3 = np.dot(w3,A2) + b3
    A3 = np.tanh(Z3)
    Z4 = np.dot(w4,A3) + b4
    A4 = np.tanh(Z4)

    epsilon = 1e-15 
    A2 = np.clip(A2, epsilon, 1 - epsilon)
    A3 = np.clip(A3, epsilon, 1 - epsilon)
    A4 = np.clip(A4, epsilon, 1 - epsilon)

    return Z1, A1, Z2, A2, Z3, A3

In [6]:
def computeCost(y, A2):
    m =  y.shape[1]
    cost = -(1/m) *(np.sum(y*np.log(A2)+(1-y)*np.log(1-A2)))
    return cost

In [7]:
def backPropagation(X, y, cache, params):
    m = y.shape[1]
    w1 = params[0]
    b1 = params[1]
    w2 = params[2]
    b2 = params[3]
    w3 = params[4]
    b3 = params[5]
    w4 = params[6]
    b4 = params[7]
    
    Z1 = cache[0]
    A1 = cache[1]
    Z2 = cache[2]
    A2 = cache[3]
    Z3 = cache[4]
    A3 = cache[5]
    Z4 = cache[6]
    A4 = cache[7]

    
    dZ4 = A4 - y
    dw4 = (1/m) * dZ4.dot(A3.T)
    db4 = (1/m) * (np.sum(dZ4))
    dZ3 = (w4.T).dot(dZ4) * (1- A3**2)
    dw3 = (1/m) * dZ3.dot(A2.T)
    db3 = (1/m) * (np.sum(dZ3))
    dZ2 = (w3.T).dot(dZ3) * (1- A2**2)
    dw2 = (1/m) * dZ2.dot(A1.T)
    db2 = (1/m) * (np.sum(dZ2))
    dZ1 = (w2.T).dot(dZ2) * (1- A1**2)
    dw1 = (1/m) * (dZ1).dot(X.T)
    db1 = (1/m) * (np.sum(dZ1))
    

    return dw1, db1, dw2, db2, dw3, db3, dw4, db4

In [8]:
def updateParams(params, grads, learning_rate = 0.01):

    w1 = params[0]
    b1 = params[1]
    w2 = params[2]
    b2 = params[3]
    w3 = params[4]
    b3 = params[5]
    w4 = params[6]
    b4 = params[7]
    
    dw1 = grads[0]
    db1 = grads[1]
    dw2 = grads[2]
    db2 = grads[3]
    dw3 = grads[4]
    db3 = grads[5]
    dw4 = grads[6]
    db4 = grads[7]

    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1
    w2 = w2 - learning_rate * dw2
    b2 = b2 - learning_rate * db2
    w3 = w3 - learning_rate * dw3
    b3 = b3 - learning_rate * db3
    w4 = w4 - learning_rate * dw4
    b4 = b4 - learning_rate * db4

    return w1, b1, w2, b2, w3, b3, w4, b4

In [9]:
def train(X, y, hidden_units = 40, epochs = 40000, learning_rate = 0.01):

    params = initParams( X , y , hidden_units)
    cost_history = []
    n_print = epochs//20

    for i in range(epochs):
        cache = forwardPropagation(X, params)
        A2 = cache[3]
        cost = computeCost(y, A2)
        grads = backPropagation(X, y, cache, params)
        params = updateParams( params, grads, learning_rate)
        cost_history.append(cost)

        if (i+1) % n_print == 0:
            predictions = get_predictions(A2)
            accu = get_accuracy(predictions, y)
            print("Iteration", i+1, " | Cost: ", cost, " | Accuracy: ", accu,"\n")
            
    w1 = params[0]
    b1 = params[1]
    w2 = params[2]
    b2 = params[3]
    w3 = params[4]
    b3 = params[5]
    w4 = params[6]
    b4 = params[7]
    
    return w1, b1, w2, b2, w3, b3, w4, b4, cost_history