In [9]:
'''
Single hidden layer neural network

A[1] = relu(W[1].T dot X + B[1])
A[2] = sigmoid(W[2].T dot A[1] + B[2])

where X[1]: (n_x, n_examples), W[1]: (n_x, n_hidden), B[1]: (n_hidden, n_examples)
and A[1]: (n_hidden, n_examples), W[2]: (n_hidden, n_y), B[2]: (n_y, n_examples)

Considering the n_examples can vary from training to testing, the biases will be broadcasted

Further inquiries:
Understand relation between input topologies and network architecture
'''

'\nSingle hidden layer neural network\n\nA[1] = relu(W[1].T dot X + B[1])\nA[2] = sigmoid(W[2].T dot A[1] + B[2])\n\nwhere X[1]: (n_x, n_examples), W[1]: (n_x, n_hidden), B[1]: (n_hidden, n_examples)\nand A[1]: (n_hidden, n_examples), W[2]: (n_hidden, n_y), B[2]: (n_y, n_examples)\n\nConsidering the n_examples can vary from training to testing, the biases will be broadcasted\n\nFurther inquiries:\nUnderstand relation between input topologies and network architecture\n'

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage

In [12]:
def load_dataset():

    # load training and test data: pixel values for cat images and binary classification output

    with h5py.File('datasets/train_catvnoncat.h5', "r") as train_dataset:
        train_x_original = np.array(train_dataset["train_set_x"][:])
        train_y_original = np.array(train_dataset["train_set_y"][:])

    with h5py.File('datasets/test_catvnoncat.h5', "r") as test_dataset:
        test_x_original = np.array(test_dataset["test_set_x"][:])
        test_y_original = np.array(test_dataset["test_set_y"][:])
        classes = np.array(test_dataset["list_classes"][:])

    # output matrix of the form (1, n) where each column is a boolean 
    train_y_original = train_y_original.reshape((1, train_y_original.shape[0]))
    test_y_original = test_y_original.reshape((1, test_y_original.shape[0]))

    return train_x_original, train_y_original, test_x_original, test_y_original, classes

train_x_original, train_y_original, test_x_original, test_y_original, classes = load_dataset()

n_train = train_x_original.shape[0] # number of training examples
n_test = test_x_original.shape[0] # number of test examples
num_pixels = train_x_original.shape[1] # number of pixels = height = width
img_shape = (num_pixels, num_pixels, train_x_original.shape[3]) # height, width, 3 channels (RGB)

# flatten data and standardize pixel values
# rows represent pixels and columns different training examples

train_x_flatten = train_x_original.reshape(n_train, -1).T
test_x_flatten = test_x_original.reshape(n_test, -1).T

print('Training input dimensions: ' + str(train_x_flatten.shape))
print('Testing input dimensions: ' + str(test_x_flatten.shape))

# distribute the data over the range 0-1
train_x = train_x_flatten / 255
test_x = test_x_flatten / 255

Training input dimensions: (12288, 209)
Testing input dimensions: (12288, 50)


In [13]:
n_x = 12288
n_hidden = 4
n_y = 1
layer_sizes = [n_x, n_hidden, n_y]
n_layers = len(layer_sizes) - 1

In [14]:
def tanh(z):
    return np.tanh(z)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [15]:

def init_params(layer_sizes):
    params = dict()
    for l in range(1, len(layer_sizes)):
        params['W{layer}'.format(layer=l)] = np.random.randn(layer_sizes[l - 1],layer_sizes[l]) * np.sqrt(2/layer_sizes[l-1]) # He et al. initialization
        params['b{layer}'.format(layer=l)] = np.zeros((layer_sizes[l], 1))
    return params

def forward_prop(params, X):
    cache = dict()
    cache['A0'] = X
    for l in range(1, n_layers + 1):
        Z_l = np.dot(params['W{layer}'.format(layer=l)].T, cache['A{prev_l}'.format(prev_l=l-1)]) + params['b{layer}'.format(layer=l)]
        cache['A{layer}'.format(layer=l)] = tanh(Z_l) if l != n_layers else sigmoid(Z_l)
    return params, cache

def compute_cost(Ak, Y):
    cost = -np.sum(np.multiply(np.log(Ak), Y) + np.multiply((1 - Y), np.log(1 - Ak))) / Y.shape[1]
    return np.squeeze(cost)

def backward_prop(params, cache, X, Y):

    n = X.shape[1]
    dZ2 = Y - cache['A2']
    dW2 = (-1/n) * np.dot(dZ2, cache['A1'].T)
    db2 = (-1/n) * np.sum(dZ2, axis=1, keepdims=1)
    dZ1 = np.multiply(np.dot(params['W2'].T, dZ2), 1 - np.power(cache['A1'], 2))
    dW1 = (-1/n) * np.dot(dZ1, cache['A0'].T)
    db1 = (-1/n) * np.sum(dZ1, axis=1, keepdims=1)
    
    return {'dW2': dW2,
            'db2': db2,
            'dW1': dW1,
            'db1': db1 }

def optimize(params, grads, learning_rate):
    for param, val in params.items():
        params[param] -= learning_rate * grads['d' + param]
    return params

In [17]:
# Method output validation

parameters = init_params(layer_sizes=layer_sizes)

print("W1 = " + str(parameters["W1"]))
print("B1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("B2 = " + str(parameters["b2"]))



W1 = [[-0.00326185  0.0111998   0.00158111  0.00868104]
 [ 0.01414519  0.01397113 -0.02165932 -0.01543981]
 [ 0.02089953  0.00764427  0.00861871  0.01288176]
 ...
 [-0.01498252  0.00119515  0.00295725  0.00606371]
 [-0.00745684  0.01134148 -0.00652726  0.02587092]
 [-0.01536979 -0.02027315  0.01602498  0.01084437]]
B1 = [[0.]
 [0.]
 [0.]
 [0.]]
W2 = [[ 0.35132324]
 [-0.60740874]
 [ 0.44115692]
 [-1.11506244]]
B2 = [[0.]]


In [18]:
def predict(params, X):
    n = X.shape[1]
    _, cache = forward_prop(params, X)
    Y_pred = np.zeros((1, n))
    
    for idx in range(n):
        Y_pred[0, idx] = 1 if cache['A{0}'.format(n_layers)][0, idx] > 0.5 else 0 # thresholding
    return Y_pred
    
def model(epoch_num, learning_rate, X, Y):
    
    params = init_params(layer_sizes)
    for epoch in range(epoch_num):
        params, cache = forward_prop(params, X)
        cost = compute_cost(cache['A2'], Y)
        grads = backward_prop(params, cache, X, Y)
        params = optimize(params, grads, learning_rate)
        if (epoch % 1000 == 0):
            print('Cost after iteration {iteration}: '.format(iteration=epoch))
    return params

m = model(5000, 0.005, train_x, train_y_original)
