In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import h5py
import scipy 
from PIL import Image 
from scipy import ndimage 
import re 

In [None]:
#This method takes in the size of the input, hidden layer, and output to determine the size of the parameters. 
#The weights are initailized randomly, and the bias vectors are set to 0 
def initialize_parameters(n_x, n_h, n_y): 
    W1 = 0.01*np.random.randn(n_h, n_x) 
    b1 = np.zeros((n_h, 1))
    W2 = 0.01*np.random.randn(n_y, n_h)
    b2 = np.zeros((n_y, 1))

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [None]:
# this function does a matrix multiplication of the weights and the adds the bias vector
def linear_forward(A, W, b):
    Z = np.matmul(W, A) + b 
    cache = (A, W, b)

    return Z, cache 

In [None]:
#this function puts the vector values through the sigmoid activation function 
#so the neural network does not collapse 
def sigmoid(Z): 
    A = np.divide(1, np.add(1, np.exp(np.negative(Z))))
    cache = Z 

    return A, cache 

In [None]:
#this function puts the vector values through the relu activation function 
#so the neural network does not collapse  
def relu(Z): 
    A = np.maximum(Z, 0)
    cache = Z 
    
    return A, cache

In [None]:
#this function does one forward pass through a node (as in multiplies the input by the weights
# and then applies the activation function to it)
def linear_activation_forward(A_prev, W, b, activation): 
    if activation == "sigmoid": 
        Z_lin, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z_lin)
    elif activation == "relu":
        Z_lin, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z_lin)

    cache = (linear_cache, activation_cache) 

    return A, cache 

In [None]:
#this function computes the loss of our prediction 
def compute_loss(A, Y): 
    m = Y.shape[1] 

    loss = -1 * (1/m) * np.sum(np.multiply(Y, np.log(A)) + np.multiply((1-Y), np.log(1-A)))

    loss = np.squeeze(loss) 

    return loss 

In [None]:
#this function computes the gradients for dW and db for the node 
#also computes the gradient that can be used for the next node's 
#backward pass 
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dA_prev = np.matmul(np.transpose(W), dZ)
    dW = np.matmul(dZ, np.transpose(A_prev))
    db = np.matmul(dZ, np.ones((m,1)))

    return dA_prev, dW, db

In [None]:
#computes the gradient for the relu activation function for the vector 
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    
    dZ = np.multiply(dA, (Z > 0).astype(int))

    return dZ

In [None]:
#computes the gradient for the sigmoid activation function for the vector 
def sigmoid_backward(dA, cache):
    Z = cache

    sigmoid_vals, sigmoid_cache = sigmoid(Z)
    dZ = np.multiply(dA, np.multiply(sigmoid_vals, (1-sigmoid_vals)))

    return dZ

In [None]:
#this function performs one backward pass through a node 
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dA_prev, dW, db = linear_backward(relu_backward(dA, activation_cache), linear_cache) 
    elif activation == "sigmoid":
        dA_prev, dW, db = linear_backward(sigmoid_backward(dA, activation_cache), linear_cache)
    
    return dA_prev, dW, db

In [None]:
#this function updates the parameters based on the gradients and the learning rate 
def update_parameters(parameters, grads, learning_rate):
    for param in parameters: 
      parameters[param] = parameters[param] - (learning_rate * grads['d' + param])

    return parameters

In [None]:
#this is for plotting the loss graph 
%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) 
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [None]:
#this function loads the data that is stored in the file and divides them by feature and response 
#and also divides some for training and some for testing 
def load_data(train_file, test_file):
    train_dataset = h5py.File(train_file, mode="r")  

    train_set_x_orig = np.array(train_dataset['train_set_x'][:])
    train_set_y_orig = np.array(train_dataset['train_set_y'][:])
    
    test_dataset = h5py.File(test_file, mode="r")
    
    test_set_x_orig = np.array(test_dataset['test_set_x'][:])
    test_set_y_orig = np.array(test_dataset['test_set_y'][:])

    classes = np.array(test_dataset["list_classes"][:]) 
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [None]:
#this function performs the training for the model (puts the forward and backward propogation function 
#together) also plots the loss graph
def two_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_loss=False):
    np.random.seed(1)
    grads = {}
    losses = []                             
    m = X.shape[1]                           
    (n_x, n_h, n_y) = layers_dims

    parameters = initialize_parameters(n_x, n_h, n_y)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    

    for i in range(0, num_iterations):
        A1, cache1 = linear_activation_forward(X, W1, b1, 'relu')
        A2, cache2 = linear_activation_forward(A1, W2, b2, 'sigmoid')

        loss = compute_loss(A2, Y)

        dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2)) / m

        dA1, dW2, db2 = linear_activation_backward(dA2, cache2, 'sigmoid')
        dA0, dW1, db1 = linear_activation_backward(dA1, cache1, 'relu')

        grads['dW1'] = dW1 
        grads['db1'] = db1
        grads['dW2'] = dW2
        grads['db2'] = db2

        update_parameters(parameters, grads, learning_rate)

        W1 = parameters["W1"]
        b1 = parameters["b1"]
        W2 = parameters["W2"]
        b2 = parameters["b2"]

        if print_loss and i % 100 == 0:
            print("Loss after iteration {}: {}".format(i, np.squeeze(loss)))
        if print_loss and i % 100 == 0:
            losses.append(loss)
       
    plt.plot(np.squeeze(losses))
    plt.ylabel('loss')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

In [None]:
#this function does a forward pass of the neural net (for testing purposes)
def two_layer_forward(X, parameters):
    caches = []
    A = X

    A1, cache = linear_activation_forward(A, parameters['W1'], parameters['b1'], 'relu')
    caches.append(cache)

    A2, cache = linear_activation_forward(A1, parameters['W2'], parameters['b2'], 'sigmoid')
    caches.append(cache) 

            
    return A2, caches

In [None]:
#this function is used to test the training neural network, and get the ouputs 
#and calculate the accuracy of the neural net for the data 
def predict(X, y, parameters):
    m = X.shape[1]
    n = len(parameters) // 2 
    p = np.zeros((1,m))

    probas, caches =  two_layer_forward(X, parameters)

    for i in range(0, probas.shape[1]):
        p[0][i] = 1 if probas[0][i] >= 0.5 else 0

    
    print("Accuracy: "  + str(np.sum((p == y)/m)))
        
    return p