In [None]:
import numpy as np
from matplotlib import pyplot as plt

In [None]:
def relu(z):
    return max(0, z)

def sigmoid(z):
    return (1/(1+np.exp(-z)))

In [3]:
def layer_sizes(X, Y):
    """
    Arguments:
    X -- input dataset of shape (input size, number of examples)
    Y -- labels of shape (output size, number of examples)
    """
    
    n_x = X.shape[0] # size of input layer
    n_h = 4
    n_y = Y.shape[0] # size of output layer
    
    return (n_x, n_h, n_y)

In [4]:
def init_params(n_x, n_h, n_y):
    """
    Argument:
    n_x -- size of the input layer
    n_h -- size of the hidden layer
    n_y -- size of the output layer
    
    Function:
    Generates Weights (random) and Biases (zeros) for the 2 layer neural network
    
    """
        
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
    
    paramters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2,
    }
    
    return paramters

In [5]:
def forward_propagation(X, parameters):
  """
    Argument:
    X -- input data of size (n_x, m)
    parameters -- python dictionary containing your parameters (output of initialization function)
  
  """
  # Extract the Weights and Bias from parameters dictionary
  W1 = parameters['W1']
  b1 = parameters['b1']
  W2 = parameters['W2']
  b2 = parameters['b2']
  
  # Calculate multiple steps of forward propagation and at the end calculate A2 Probabilities
  Z1 = np.matmul(W1, X) + b1
  A1 = np.tanh(Z1)
  Z2 = np.matmul(W2, A1) + b2
  A2 = sigmoid(Z2)
  
  cache = {
    'Z1': Z1,
    'A1': A1,
    'Z2': Z2,
    'A2': A2
  }
  
  return cache

In [6]:
def calc_cost(A2, Y, parameters):
  """
    Computes the cross-entropy cost
    
    Arguments:
    A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    parameters -- python dictionary containing your parameters W1, b1, W2 and b2
  
  """
  # Get the total number of examples
  m = Y.shape[1]
  
  # Compute the Cross-entropy cost
  logprob = (np.multiply(np.log(A2), Y) + np.multiply(np.log(1-A2), (1-Y)))
  
  # Squeeze the Numpy array (removes the extra dimensions)
  cost = np.squeeze(-(1/m) * np.sum(logprob))
  
  return cost

In [7]:
def backward_prop(parameters, cache, X, Y):
  """
  Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
  """
  
  m = X.shape[1]
  
  # Get the Weights from 'paramters' dictionary
  W1 = parameters['W1']
  W2 = parameters['W2']
  
  # Retrieve the respective activations from 'cache' dictionary
  A1 = cache['A1']
  A2 = cache['A2']
  
  # Calculate respective derivatives
  dZ2 = A2 - Y # Derivative of Final layer output is final Activation - Target value (Predicted - Original)
  dW2 = (1/m) * np.matmul(dZ2, A1.T) # Derivative of Second layer weights is multiplication of dZ2 and A1.T, averaged over all samples
  db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
  dZ1 = np.matmul(W2.T, dZ2) * (1 - np.power(A1, 2))
  dW1 = (1/m) * np.matmul(dZ1, X.T)
  db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
  
  gradients = {
    'dW1': dW1,
    'db1': db1,
    'dW2': dW2,
    'db2': db2,
  }

In [8]:
def update_paramters(parameters, gradients, learning_rate = 1):
  """
    Updates parameters using the gradient descent update rule given above
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients 
  
  """
  
  # Get the Paramters from the Dictionary
  W1 = paramters['W1']
  b1 = parameters['b1']
  W2 = parameters['W2']
  b2 = parameters['b2']
  
  # Get the gradients from the dictionary
  dW1 = gradients['dW1']
  db1 = gradients['db1']
  dW2 = gradients['dW2']
  db2 = gradients['db2']
  
  # Run Gradient Descent for all weights and biases
  W1 = W1 - learning_rate * dW1
  b1 = b1 - learning_rate * db1
  W2 = W2 - learning_rate * dW2
  b2 = b2 - learning_rate * db2
  
  # Pack the parameters into a dictionary
  paramters = {
    'W1': W1,
    'b1': b1,
    'W2': W2,
    'b2': b2,
  }
  
  return paramters

In [11]:
def fit_model(X, Y, n_h, epochs=10000, print_cost = False):
  """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
  
  """
  
  # Make new n_x and n_y
  n_x = layer_sizes(X, Y)[0]
  n_y = layer_sizes(X, Y)[2]
  
  # Initialize paramters
  parameters = initialize_parameters(n_x, n_h, n_y)
  W1 = parameters['W1']
  b1 = parameters['b1']
  W2 = parameters['W2']
  b2 = parameters['b2']
  
  
  # Run the loop for training
  for epoch in range(epochs):
    cache = forward_propagation(X, parameters=parameters)
    
    A2 = cache['A2']
    
    cost = calc_cost(A2, Y, parameters)
    
    grads = backward_prop(parameters, cache, X, Y)
    
    parameters = update_paramters(parameters, grads)
    
    if print_cost and epoch % 1000 == 0:
      print("Cost after iteration %i: %f" %(i, cost))
      
  return parameters

In [12]:
def predict(parameters, X):
  """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
  """
  
  cache = forward_propagation(X, parameters)
  A2 = cache['A2']
  
  predictions = (A2 > 0.5)
  
  return predictions