In [116]:
# Package imports
import sklearn
from sklearn import datasets, linear_model
import sklearn.datasets
from sklearn import preprocessing
# http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from decimal import Decimal
#from mnist import MNIST
import datetime


In [117]:
# initialise the weights for the network based on the input layers, the number of hidden layers, the number of output layers
#reference for above https://www.coursera.org/learn/deep-neural-network/lecture/RwqYe/weight-initialization-for-deep-networks
def initialise_input_weights(n_inputs, n_hidden_inputs):
 hidden_layer_weights = list()
 for i in range(n_hidden_inputs):
  weight = np.random.randn(n_inputs)*np.sqrt(1/(n_inputs)**(n_hidden_inputs-1))
  hidden_layer_weights.append(weight)
   
 input_weights = np.array([hidden_layer_weights], dtype=np.float64)
 input_weights = np.reshape(input_weights, (n_inputs, n_hidden_inputs))
 return input_weights; 

def initialise_output_weights(n_hidden_inputs,n_outputs):
 output_layer_weights = list()
 for i in range(n_hidden_inputs):
  weight = np.random.randn(n_outputs)*np.sqrt(1/(n_outputs)**(n_hidden_inputs-1))
  output_layer_weights.append(weight) 
   
 output_weights = np.array(output_layer_weights, dtype=np.float64)
 #if n_outputs == 1:
  #output_weights = np.array([output_layer_weights])
 #elif n_outputs > 1:
  #output_weights = np.array([output_layer_weights])  
  #output_weights = np.reshape(output_weights, (n_hidden_inputs,n_outputs))   

 return output_weights;

#initialise the bias for the network based on the number of hidden layers and the output layer bias
def initialise_bias(n_hidden_layer):
 hidden_layer_bias = list()    
 for i in range(n_hidden_layer):
  bias = np.random.random(1)[0]
  hidden_layer_bias.append(bias)
 
 output_layer_bias = [np.random.random(1)[0]]
 network_bias = np.array([hidden_layer_bias,output_layer_bias],dtype=np.float64)
 return network_bias;


In [118]:
# Testing weight intialisation
n_hidden_layers = 1
n_hidden_inputs = 2
n_i_inputs = 2
n_outputs = 1

input_weights = initialise_input_weights(n_i_inputs, n_hidden_inputs)
print("Input Weights: ",input_weights)
weights = initialise_output_weights(n_hidden_inputs, n_outputs)
print("Output Weights: ",weights)

bias = initialise_bias(n_hidden_layers)
print("Bias:", bias)

Input Weights:  [[-0.92626096  0.14734553]
 [ 1.5627489   0.62405363]]
Output Weights:  [[ 0.35740898]
 [-0.03535481]]
Bias: [[ 0.93763224]
 [ 0.62218154]]


In [136]:
# Forward Propagation

# Sigmoid function
# g(z) = 1/ 1 + e^-z
def sigmoid(z):
 g = 1/(1 + np.exp(-z))
 return g;

def forward_activation(inputs, input_weights, output_weights, bias):
    hidden_output_activation = get_hidden_activation(inputs, input_weights, bias[0])
    output_activation = get_output_activation(hidden_output_activation, output_weights, bias[1])
    
    print("hidden_output_activation", hidden_output_activation)
    print("output_activation", output_activation)
    return hidden_output_activation, output_activation

def get_hidden_activation(inputs, input_weights, bias):
    hidden_net = 0
    #hidden_output_list = list()
    for i in range(len(input_weights)):
        hidden_net += (input_weights[i] * inputs[i]) + bias * 1
        
    sigmoid_out = sigmoid(hidden_net)
    #hidden_output_list.append(sigmoid_out)
    
    #hidden_output_activation = np.array(hidden_output_list, dtype=np.float64)
    hidden_output_activation = np.array(sigmoid_out, dtype=np.float64)
    return hidden_output_activation;

def get_output_activation(hidden_output_activation, output_weights, bias):
    out_net = 0
    output_list = list()
    for i in range(len(output_weights)):
        out_net += (output_weights[i] * hidden_output_activation[i]) + bias * 1
    
    sigmoid_out = sigmoid(out_net)
    output_activation = np.array(sigmoid_out, dtype=np.float64)
    return output_activation;
        
def get_total_error(targets, output_activation):
    total_error = 0
    
    for i in range(len(output_activation)):
        error = targets - output_activation[i]
        total_error += 1/2 * np.power(error, 2)
    return total_error;


In [125]:
# Setup Test data required for training

# Use pandas to read the CSV file as a dataframe
df = pd.read_csv("moons400.csv")
# The y values are those labelled 'Class': extract their values
y = df['Class'].values
# using sklearn.model_selection.train_test_split to split up data into train and test sets split 70/30
train_X, test_X, train_y, test_y = train_test_split(df, y, test_size=0.30)

del train_X['Class']    # drop the 'Class' column from the Train and test dataframe
del test_X['Class']

train_X = train_X.as_matrix() # convert the remaining train columns to a numpy array
test_X = test_X.as_matrix() # convert the remaining test columns to a numpy array

#print(train_y)

# Some examples of working with the data, to look at rows/columns
print ("len(X):", len(train_X))            # outer array: one per sample
print ("len(X[0]):", len(train_X[0]))      # each inner array is the attributes of one sample
print ("len(X[:,0]):", len(train_X[:,0]))  # select column 0 from array

inputs = preprocessing.normalize(train_X) # normalise the input data
# np.shape returns all dimensions of the input array
(nsamples, nattribs) = np.shape(inputs)
print ("X: nsamples =", nsamples, ", nattribs =", nattribs)
# the actual labeled target values
targets = train_y

# initialise neural network structure
n_i_inputs = nattribs # number of attributes of inputs
n_hidden_inputs = 2 # number of hidden input nodes
n_hidden_layers = 1 # number of hidden layers
n_outputs = 1 # number of output nodes

# initialise weights
input_weights = initialise_input_weights(n_i_inputs, n_hidden_inputs)
output_weights = initialise_output_weights(n_hidden_inputs, n_outputs)
bias = initialise_bias(n_hidden_layers)

print("input_weights", input_weights)
print("output_weights", output_weights)
print("bias ",bias[0], bias[1])


len(X): 280
len(X[0]): 2
len(X[:,0]): 280
X: nsamples = 280 , nattribs = 2
input_weights [[ 0.13899265  0.99674632]
 [-0.455725   -0.22486462]]
output_weights [[ 0.78006857]
 [ 0.95751527]]
bias  [ 0.35750022] [ 0.13229714]


In [138]:
# Test Forward Propagation
hidden_output_activation, output_activation = forward_activation(inputs, input_weights, output_weights, bias)
total_error = get_total_error(output_activation, targets)

#print("Net inputs", hidden_net)
#print("Hidden output_activation", hidden_output_activation)  
#print("Net outputs", out_net)
#print("Output activation", output_activation)  

#print("targets ", targets) 
print("Total Error ",total_error)
#epoch_list = list()
#epoch_list.append(total_error)
#print(epoch_list)

hidden_output_activation [ 0.6120145   0.81587719]
output_activation [ 0.82101251]
Total Error  [ 49.42686453]


In [139]:
# Back Propagation

# The partial derivitive of the total error with respect to the output a1_3/output_activation 
# ∂Etotal/∂outo1 = 2 * 1/2(target- output_activation)^2-1 * -1 + 0 
# outo1= output_activation, Etotal = sse_1
# pd = Partial Derivitive, wrt= with respect to
def calc_pd_total_error_wrt_output_activation(target, output):
    pd_total_error_list = list()
    for i in range(len(target)):
        error = target[i] - output
        pd_total_error = 2 * 1/2 * np.power((error), 2-1) * -1 + 0
        pd_total_error_list.append(pd_total_error)
      
    pd_total_error_wrt_output_activation = np.array(pd_total_error_list, dtype=np.float64)
    return pd_total_error_wrt_output_activation;

# The partial derivitive of the total error with respect to the output a1_3/output_activation 
# ∂outo1/∂neto1 = output_activation(1 - output_activation) 

def calc_pd_output_activation_wrt_net_input(output):
    pd_output_activation_wrt_net_input = output * (1 - output) 
    print("partial_derivitive_output with respect to net input: ",pd_output_activation_wrt_net_input)
    return pd_output_activation_wrt_net_input;


#The partial derivitive of net output with respect to weight i:
# outputs: out_h1 & out_h2 weights w11_2, w12_2
def calc_pd_net_output_wrt_weight(layer_output, output_weights):
    print("Shape of layer_output", np.shape(layer_output))
    print("Shape of Output weights", np.shape(output_weights))
    print(layer_output)
    print(output_weights)
    pd_net_output_wrt_weight =  1 * layer_output * np.power(output_weights,(1-1)) + 0 + 0
    print("Partial derivitive of net output with respect to weight: ",pd_net_output_wrt_weight)
    return pd_net_output_wrt_weight;
    

# The partial derivitive of Etotal with respect to W5
# this is for a single weight W5, same process also has to be done for W6
# ∂Etotal/∂W11_2 = ∂Etotal/∂outo1 * ∂outo1/∂neto1 * ∂neto1/∂w11_2 
def calc_pd_total_error_wrt_weight(pd_total_error_wrt_output_activation, pd_output_activation_wrt_net_input, pd_net_output_wrt_weight ):
    pd_total_error_wrt_weight = pd_total_error_wrt_output_activation * pd_output_activation_wrt_net_input * pd_net_output_wrt_weight
    print("Partial derivitive of Total Error with respect to weight: ",pd_total_error_wrt_weight)
    return pd_total_error_wrt_weight;

# Calculate the adjusted input/output weights
# ∂Etotal/∂W11_2 = δ_o1 out_h1
# Wi^ = Wi - α * ∂Etotal/∂Wi
def adjust_weight(weight, pd_total_error_wrt_weight):
    adjusted_weight = weight - alpha * pd_total_error_wrt_weight
    return adjusted_weight;


In [140]:
# Test Back Propagation
pd_total_error_wrt_output_activation = calc_pd_total_error_wrt_output_activation(targets, output_activation)

pd_output_activation_wrt_net_input = calc_pd_output_activation_wrt_net_input(output_activation)
    
pd_net_output_wrt_weight = calc_pd_net_output_wrt_weight(hidden_output_activation, output_weights)
    
pd_total_error_wrt_weight = calc_pd_total_error_wrt_weight(pd_total_error_wrt_output_activation, pd_output_activation_wrt_net_input, pd_net_output_wrt_weight) 

adjusted_output_weight = adjust_weight(output_weights, pd_total_error_wrt_weight)

print("Partial derivitive of total error with respect to the output: ",pd_total_error_wrt_output_activation)
print("Adjusted Output Weights: ",adjusted_output_weight)



partial_derivitive_output with respect to net input:  [ 0.14695097]
Shape of layer_output (2,)
Shape of Output weights (2, 1)
[ 0.6120145   0.81587719]
[[ 0.78006857]
 [ 0.95751527]]
Partial derivitive of net output with respect to weight:  [[ 0.6120145   0.81587719]
 [ 0.6120145   0.81587719]]


ValueError: operands could not be broadcast together with shapes (280,1) (2,2) 

In [141]:
# hidden layer weights update

pd_total_error_wrt_hidden_activation = calc_pd_total_error_wrt_output_activation(targets, hidden_output_activation)

pd_hidden_activation_wrt_net_input = calc_pd_output_activation_wrt_net_input(hidden_output_activation)
    
pd_hidden_net_wrt_weight = calc_pd_net_output_wrt_weight(hidden_output_activation, input_weights)
    
pd_total_error_wrt_input_weight = calc_pd_total_error_wrt_weight(pd_total_error_wrt_hidden_activation, pd_hidden_activation_wrt_net_input, pd_hidden_net_wrt_weight) 

adjusted_input_weight = adjust_weight(input_weights, pd_total_error_wrt_input_weight)

print("Adjusted Input Weights: ",adjusted_input_weight)

partial_derivitive_output with respect to net input:  [ 0.23745275  0.1502216 ]
Shape of layer_output (2,)
Shape of Output weights (2, 2)
[ 0.6120145   0.81587719]
[[ 0.13899265  0.99674632]
 [-0.455725   -0.22486462]]
Partial derivitive of net output with respect to weight:  [[ 0.6120145   0.81587719]
 [ 0.6120145   0.81587719]]


ValueError: operands could not be broadcast together with shapes (280,2) (2,2) 