In [1]:
# Import libraries
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
# Set random seed
np.random.seed(401293)

In [3]:
# Load data
data = load_breast_cancer()
X = data['data']
y = data['target'].reshape(data['target'].shape[0], 1)

# Divide into train and test data 
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Print shapes as a sanity check
print(X_train.shape)
print(y_train.shape)

(426, 30)
(426, 1)


In [4]:
# Initialization 
def initialize_parameters(n_x, L, layers_dims):
       
    # Store the parameter values here
    parameters = {}
    
    # Initialize the  
    X_dim_prev = n_x 
    
    # Randomly initialize each layer
    for l in range(L):
        
        parameters["W" + str(l+1)] = np.random.randn(layers_dims[l], X_dim_prev)*0.01
        parameters["b" + str(l+1)] = np.zeros((layers_dims[l], 1))
        X_dim_prev = layers_dims[l]
    
    # Return parameters
    return(parameters)

In [5]:
# Forward propogation
def forward_propagation(X, L, parameters):
    
    # We'll store Z and A values here
    Z = {}
    A = {}
    A["A0"] = X
    A_prev = X
    
    # Loop through each layer
    for l in range(L): 
        
        # Store the values we need cleanly
        W_l = parameters["W" + str(l+1)]
        b_l = parameters["b" + str(l+1)]
        
        # Send these values through the linear function
        Z["Z" + str(l+1)] = np.dot(W_l, A_prev) + b_l 
        
        # Send the values to the sigmoid function
        A["A" + str(l+1)] = sigmoid(Z["Z" + str(l+1)])
        
        # Update the layer
        A_prev = A["A" + str(l+1)]
        
    # Return statement
    return(A, Z)

In [6]:
# Compute cost
def compute_cost(AL, Y, m):
    
    # Compute the cost
    cost = (-1/m) * np.sum(Y * np.log(AL) + (1-Y) * np.log(1-AL))
    
    # Return statement
    return(cost)

In [7]:
# Backward propogation    
def backward_propogation(L, m, A, AL, Y, Z, parameters):
    
    # Store the gradients here
    grads = {}
    
    # Initial values for output layer
    grads["dA" + str(L)] = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    grads["dZ" + str(L)] = grads["dA" + str(L)] * sigmoid_prime(Z["Z" + str(L)])
    grads["dW" + str(L)] = 1/m * np.dot(grads["dZ" + str(L)], A["A" + str(L-1)].T)
    grads["db" + str(L)] = 1/m * np.sum(grads["dZ" + str(L)], axis = 1, keepdims = True)
    
    # Now send the values backwards through the net
    for l in reversed(range(1, L)):
        
        # Helper calculations
        first_term = np.dot(parameters["W" + str(l+1)].T, grads["dZ" + str(l+1)])
        second_term = sigmoid_prime(Z["Z" + str(l)])
        
        # Gradient calculations
        grads["dZ" + str(l)] = first_term*second_term
        grads["dW" + str(l)] = 1/m * np.dot(grads["dZ" + str(l)], A["A" + str(l-1)].T)
        grads["db" + str(l)] = 1/m * np.sum(grads["dZ" + str(l)], axis = 1, keepdims = True)
        
    # Return statement
    return(grads)

In [8]:
# Update parameters    
def update_parameters(L, parameters, grads, learning_rate):
    
    for l in range(L): 
        
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - (learning_rate*grads["dW" + str(l+1)])
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - (learning_rate*grads["db" + str(l+1)])
     
    # Update rule
    return(parameters)

In [9]:
# Activation function
def sigmoid(Z): 
    
    # Activate each unit of Z
    return 1.0/(1.0 + np.exp(-Z))

In [10]:
# Derivative of activation function w.r.t Z
def sigmoid_prime(Z):
    
    # Store function value
    sig = sigmoid(Z)
    
    # Return slope of sigmoid at Z
    return (sig) *(1.0 - sig)

In [11]:
# Calculate no. of successes in prediction
def calculate_binary_accuracy(AL, Y):

    # Turn probabilities into predictions
    predictions = (AL > 0.5).astype(int)
    
    # Total examples
    total = Y.shape[1]
    
    # Compare predictions to training set
    correct = np.sum((predictions == Y).astype(int))
    
    # Return accuracy
    return(correct/total)

In [12]:
# Calculate accuracy on test set
def calculate_test_accuracy(X_test, Y_test, L, parameters):
    
    # Send the test data through the net
    A, Z = forward_propagation(X_test, L, parameters)
    
    # Calculate the last layer's activations
    AL = A["A" + str(L)]
    
    # Calculate the accuracy w.r.t. test data
    test_accuracy = calculate_binary_accuracy(AL, Y_test)
    
    # Return statement
    return(test_accuracy)

In [13]:
# Model implementation 
def nn_model(X, Y, X_test, Y_test, layers_dims, learning_rate, num_iterations):
    
    # Get NN architecture hyper-parameters
    n_x = X.shape[0]
    m = X.shape[1]
    L = len(layers_dims)
    
    # Set parameters 
    parameters = initialize_parameters(n_x, L, layers_dims)
    
    # Execute the model
    for iteration in range(1, num_iterations + 1):
        
        # Send data through the net
        A, Z = forward_propagation(X, L, parameters)
        
        # Store predictions from this forward pass
        AL = A["A" + str(L)]
                
        # Calculate training set accuracy
        training_accuracy = calculate_binary_accuracy(AL, Y)
        
        # Calculate the test set accuracy
        testing_accuracy = calculate_test_accuracy(X_test, Y_test, L, parameters)
        
        # Compute costs 
        cost = compute_cost(AL, Y, m)
        
        # Compute gradients
        grads = backward_propogation(L, m, A, AL, Y, Z, parameters) 
        
        # Update the parameters for the next iteration
        parameters = update_parameters(L, parameters, grads, learning_rate)
        
        # Print progress every 1000 steps
        if iteration % 1000 == 0:
            
            placeholder = "Iteration: {}, Cost: {}, Training accuracy: {}, Testing accuracy: {}"
            print(placeholder.format(iteration, cost, training_accuracy, testing_accuracy), end = '\r')
        
        # Next step
        iteration +=1
     
    # Generate predictions once this is done
    predictions = (AL > 0.5).astype(int)
    
    # Return statement
    return(cost, predictions, parameters, training_accuracy, testing_accuracy)

In [14]:
# Test architectures
arch1 = [30, 1]
arch2 = [15, 1]
arch3 = [15, 2, 1]

# Test call
test3 = nn_model(X_train.T, y_train.T, X_test.T, y_test.T, arch3, .0005, 200000)
test1 = nn_model(X_train.T, y_train.T, X_test.T, y_test.T, arch1, .0005, 200000)
test2 = nn_model(X_train.T, y_train.T, X_test.T, y_test.T, arch2, .0005, 200000)

Iteration: 139000, Cost: 0.24298976842648395, Training accuracy: 0.9342723004694836, Testing accuracy: 0.9230769230769231

KeyboardInterrupt: 