In [2]:
import matplotlib
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from decimal import Decimal
from sklearn import preprocessing
from mnist import MNIST

In [3]:
# set up training rate alpha
alpha = 0.5

# initialise the weights for the network based on the input layers, the number of hidden layers, the number of output layers
def initialise_input_weights(n_inputs, n_hidden_inputs):
 hidden_layer_weights = list()
 for i in range(n_hidden_inputs):
  for j in range(n_inputs):
   weight = np.random.random(1)[0]
   hidden_layer_weights.append(weight)
 
 input_weights = np.array([hidden_layer_weights])
 input_weights = np.reshape(input_weights, (n_inputs, n_hidden_inputs))
 return input_weights; 

def initialise_output_weights(n_hidden_inputs,n_outputs):
 output_layer_weights = list()
 for i in range(n_outputs):
  for j in range(n_hidden_inputs):
   weight = np.random.random(1)[0]
   output_layer_weights.append(weight) 
  
 if n_outputs == 1:
  output_weights = np.array([output_layer_weights])
 elif n_outputs > 1:
  output_weights = np.array([output_layer_weights])  
  output_weights = np.reshape(output_weights, (n_hidden_inputs,n_outputs))
 
 return output_weights;
    
#initialise the bias for the network based on the number of hidden layers and the output layer bias
def initialise_bias(n_hidden_layer):
 hidden_layer_bias = list()    
 for i in range(n_hidden_layer):
  bias = np.random.random(1)[0]
  hidden_layer_bias.append(bias)
 
 output_layer_bias = [np.random.random(1)[0]]
 network_bias = [[hidden_layer_bias],[output_layer_bias]]
 return network_bias;


In [4]:
# Sigmoid function
# g(z) = 1/ 1 + e^-z
def sigmoid(z):
 g = 1/(1 + np.exp(-z))
 return g;

# --------------- FORWARD PROPAGATION  --------------------------
# return the sigmoid_input_act - sigmoid function sum of the input layer activation 
# a_n^2 = f(W_n1^1 x1 + W_n2^1 x2 + W_n3^1 x3 +b_n^1 )
def get_input_layer_activation(input, input_weights, input_bias):
 input_layer_activation = input * input_weights
 #print("input_layer_activation: ",input_layer_activation)
 hidden_layer_activation = np.sum(input_layer_activation, axis = 1) + input_bias * 1
 #print("Sum of Hidden Layer activation: ", hidden_layer_activation)
 sigmoid_input_activation = sigmoid(hidden_layer_activation)
 #print("Sigmoid function of Sum of Hidden Layer activation: ", sigmoid_input_activation)
 return sigmoid_input_activation;

# get output layer activation for hidden layer
# a1_3
def get_activation_output(sigmoid_input_activation, output_weights, output_bias):
 output = (sigmoid_input_activation * output_weights)
 activation_output = sigmoid(np.sum(output) + output_bias * 1 )
 #print("Activation Output: ",activation_output)   
 return activation_output;
 
def forward_propagation(input, input_weights, output_weights, input_bias, output_bias):
 sigmoid_input_activation = get_input_layer_activation(input, input_weights, input_bias)
 activation_output = get_activation_output(sigmoid_input_activation, output_weights, output_bias)
 #print("Forward Propagation Activation Output: ",activation_output)    
 return activation_output;   

# Calculate the Total Error Sum of Squared Errors = ∑ 1/2(Y-YP)^2 
#E_total = 1/2(target_01 - out_01)^2 
# sum of squared errors of prediction
def calc_error(actual_y, target):
 error = 1/2 * np.power((actual_y - target), 2)
 #print("Error Total: ",error)
 return error;

# If there is more than 1 node in the output layer, sum up the calc error
def calc_total_error(actual_y, target):
 total_error = 0
 total_error += calc_error(actual_y, target)
 return total_error;

In [5]:
# FUNCTIONS FOR BACK PROPAGATION OF OUTPUT
# calculate derivative of error at output layer

# Derivitive of error with reference to output
# deriv_wrt_out = -(target - output)
# change above to function
def deltaErr_wrt_out(target, output):
 result = -(target - output)
 print("Derivitive of error with reference to output:", result)
 return result;  

# calculate the derivation of the error output wrt the net
# derivout_wrt_net = output*(1-output)
# change above to function
def deltaOut_wrt_net(output):
 result = output * (1 - output)
 print("Derivitive of error output with reference to net output:", result)
 return result;   

# calculate derivative of error wrt to output layer weight OLW_Deriv
# Output Layer Weight Derivitive
def deltaErr_ow(deriv_wrt_out, derivout_wrt_net, activation):
 OLW_Deriv = deriv_wrt_out * derivout_wrt_net * activation
 print("Derivative of error with reference to output layer weight:", OLW_Deriv)
 return OLW_Deriv;
    
# FUNCTIONS FOR BACK PROPAGATION OF HIDDEN LAYER
# calculate derivative of error at hidden layer
# deriv_out_wrt_hL =  Weights2 * deriv_wrt_out *derivout_wrt_net
# print (deriv_out_wrt_hL)
# convert above to function
def deltaOut_hL(deriv_wrt_out, derivout_wrt_net, output_weights):
 deriv_out_wrt_hL = deriv_wrt_out * derivout_wrt_net * output_weights
 return deriv_out_wrt_hL;

# derivitive output in relation to net of hidden layer activation
# deriv_out_wrt_nethL = activation*(1-activation)
# convert above to function
def deltaOut_netHL(activation):
 activation = activation * (1 - activation)
 return activation;

# Derivitive in relation to input_weights
# deriv_wrt_wi = deriv_out_wrt_hL*deriv_out_wrt_nethL*Weights1
# convert above to function
def deltaErr_wi(deriv_out_wrt_hL, deriv_out_wrt_nethL, input_weights):
 deriv_wrt_wi = deriv_out_wrt_hL * deriv_out_wrt_nethL * input_weights
 return deriv_wrt_wi;

# convert the above to a function
def calc_adjusted_weights(W, deriv):
 W = W - (alpha * deriv)
 return W;

# delta error with reference to output
def delta_error_wrt_output(output, hidden_layer, target):
 deltaErr_out = deltaErr_wrt_out(target, output)
 deltaOut_net = deltaOut_wrt_net(output)
 deltaErrtot_ow = deltaErr_ow(deltaErr_out, deltaOut_net, hidden_layer)
 print("Delta Error with reference to output", deltaErrtot_ow)
 return deltaErrtot_ow, deltaErr_out, deltaOut_net;

 
# Delta Error with reference to input
def delta_error_wrt_input(hidden_layer, network_bias, deltaErr_out, deltaOut_net):
 deltaErrOut_hL = deltaOut_hL(deltaErr_out, deltaOut_net, network_bias[1])
 deltaErrOut_netHL = deltaOut_netHL(hidden_layer)
 deltaErrH_wi = deltaErr_wi(deltaErrOut_hL, deltaErrOut_netHL, network_bias[0])
 print("Delta Error with reference to input", deltaErrH_wi)
 return deltaErrH_wi;

In [7]:
 # Read in the the input file for the train/test datasets   
inputDataFrame = pd.read_csv("moons400.csv")  # instance variable unique to each instance
y = inputDataFrame['Class'].values   

#split up the dataset into training and test split 70/30 for training/test
train_X, test_X, train_y, test_y = train_test_split(inputDataFrame, y, test_size=0.30)
#print(train_X)
#print(train_y)

del train_X['Class']    # drop the 'Class' column from the Train and test dataframe
del test_X['Class']
print(train_X.shape, train_y.shape)
print(test_X.shape, test_y.shape)

(nsamples, nattribs) = np.shape(train_X)
#subset_X = train_X[0:1]
subset_X = train_X
#input_y = train_y[0:1]
input_y = train_y
#print("Subset_X", subset_X)
#print("input_y", input_y)

(280, 2) (280,)
(120, 2) (120,)


In [44]:
# Test weights and bias initialisation based on network inputs, hidden layers, and outputs
# first line of moons.csv
#input_X =[[2.07106946, 0.41152931]]

#(nsamples, nattribs) = np.shape(input_X)
n_inputs = nattribs
n_hidden_layer = 1
n_hidden_inputs = 2
n_outputs = 1

# rescale the inputs using normalization 
inputs = preprocessing.normalize(subset_X)
#print("normalised inputs",inputs)

input_weights = np.array(initialise_input_weights(n_inputs, n_hidden_inputs))
output_weights = np.array(initialise_output_weights(n_hidden_inputs, n_outputs))
#network_weights = initialise_weights(n_inputs, n_hidden_inputs, n_outputs)
network_bias = np.array(initialise_bias(n_hidden_layer))

print("Length of inputs",len(inputs))
#print("actual y",input_y)
print("input Weights", input_weights)
print("output Weights", output_weights)
print("Network Bias", network_bias)

#target = input_y
#print("target",len(target))

Length of inputs 280
input Weights [[ 0.90094218  0.64800375]
 [ 0.63695274  0.75987986]]
output Weights [[ 0.82291974  0.52800769]]
Network Bias [[[ 0.95155327]]

 [[ 0.24708323]]]


In [9]:
# Training section
# use a loop to iterate through the dataset
# and present records 1 by 1
# for i in np.nditer(inputs, flags=['external_loop'], order='C'):
#for i in range(len(inputs)):
#    print (i)
#    for t in range((target[i])):## this part is not working because it loops through the whole target array each time
#        print(t)  ## not what we want

# Training the model
# returns adjusted weights
print(len(input_y))
def training_moon_set(inputs, input_y, input_weights, output_weights,network_bias):
 target = input_y
 threshold=1e-7
 #maxrounds=5000
 maxrounds=10
 iter = 0

 error = 99.0
 while abs(error) > threshold:
    for i in range(len(inputs)):
        # ----------START OF FORWARD PROPAGATION FUNCTION CALLS
        hidden_layer = get_input_layer_activation(inputs[i], input_weights, network_bias[0])
        output = forward_propagation(inputs, input_weights, output_weights, network_bias[0], network_bias[1])
        print ("output:", output)

        # call error function
        error = calc_total_error(input_y[i], output )
        # error = get_error(target[t], output)
        print("Error Total:", error)
        # -------- END OF FORWARD PROPAGATION FUNCTION CALLS

         # ------- START OF BACK PROPAGATION FOR OUTPUT LAYER
        (deltaErrtot_ow, deltaErr_out, deltaOut_net) = delta_error_wrt_output(output, hidden_layer, target[i])
        deltaErrH_wi = delta_error_wrt_input(hidden_layer, network_bias, deltaErr_out, deltaOut_net) 
        # calculate adjusted weights using function
        output_weights = calc_adjusted_weights(output_weights, deltaErrtot_ow)
        input_weights = calc_adjusted_weights(input_weights, deltaErrH_wi)
        # --------- END OF BACK PROPAGATION OF HIDDEN LAYER

    iter +=1
    
    if (iter > maxrounds):
        break

    print ("\nFinished after iteration ", iter, " error =", error, "target=", target[i], ", output=", output, "input final=", inputs[i])    

    print ("Weights1 adjusted:", input_weights)
    print ("Weights2 adjusted:", output_weights)
    
    return input_weights,output_weights; 


280


In [45]:
print(np.shape(inputs))
print(np.shape(input_y))
print(np.shape(input_weights))
print(np.shape(output_weights))
print(np.shape(network_bias))

(280, 2)
(280,)
(2, 2)
(1, 2)
(2, 1, 1)


In [12]:
# retrieve adjusted weights from the training set
input_weights,output_weights = training_moon_set(inputs, input_y, input_weights, output_weights, network_bias)

ValueError: operands could not be broadcast together with shapes (280,2) (2,2) 

In [46]:
# Same code as cell above - but with alot of the commented out code removed

# use a loop to iterate through the dataset
# and present records 1 by 1
# for i in np.nditer(inputs, flags=['external_loop'], order='C'):
#for i in range(len(inputs)):
#    print (i)
#    for t in range((target[i])):## this part is not working because it loops through the whole target array each time
#        print(t)  ## not what we want
#print("inputs",inputs)
print("Length of inputs",len(inputs))
#print("actual y",input_y)
print("input Weights", input_weights)
print("output Weights", output_weights)
print("Network Bias", network_bias)
target = input_y


tol=1e-7
#maxrounds=5000
maxrounds=1
iter = 0

error = 99.0
while abs(error) > tol:
    for i in range(len(inputs)):
    #print (i)
    # START OF FORWARD PROPAGATION FUNCTION CALLS
        hidden_layer = get_input_layer_activation(inputs[i], input_weights, network_bias[0])
    #activation_output = get_activation_output(activation_input,output_weights, network_bias[1] )

    # test get_sigmoid_output 
    #hidden_layer = get_sigmoid_output(Weights1, inputs[i], bias_1)
    #print ("hidden_layer result:", hidden_layer)

    # test get_output
    #output = get_output(hidden_layer, Weights2, bias_2)
        output = forward_propagation(inputs, input_weights, output_weights, network_bias[0], network_bias[1])
        print ("output:", output)

    # call error function
        error = calc_total_error(input_y[i], output )
     
     #error = get_error(target[t], output)
        print("Error Total:", error)

 # END OF FORWARD PROPAGATION FUNCTION CALLS

 # START OF BACK PROPAGATION FOR OUTPUT LAYER
 #test function deltaErr_wrt_out
 #deltaErr_out = deltaErr_wrt_out(target[i], output)
 #deltaOut_net = deltaOut_wrt_net(output)
 
 #deltaErrtot_ow = deltaErr_ow(deltaErr_out, deltaOut_net, hidden_layer)
        (deltaErrtot_ow, deltaErr_out, deltaOut_net) = delta_error_wrt_output(output, hidden_layer, target[i])
  
 #END OF BACK PROPAGATION FOR OUTPUT LAYER
    
 # START BACK PROPAGATION OF HIDDEN LAYER
 # calculate derivative of error at hidden layer
 # test function deltaOut_hL
 #deltaErrOut_hL = deltaOut_hL(deltaErr_out, deltaOut_net, network_bias[1])
 # print(deltaErrOut_hL)
 #deltaErrOut_netHL = deltaOut_netHL(hidden_layer)
 # print(deltaErrOut_netHL)
 
 # ************ this one needed for input weights
 #deltaErrH_wi = deltaErr_wi(deltaErrOut_hL, deltaErrOut_netHL, network_bias[0])
        deltaErrH_wi = delta_error_wrt_input(hidden_layer, network_bias, deltaErr_out, deltaOut_net) 
# print(deltaErrH_wi)

    # same calculation as above but with Weights matrix 
    # calculate adjusted weights using function
        output_weights = calc_adjusted_weights(output_weights, deltaErrtot_ow)
        input_weights = calc_adjusted_weights(input_weights, deltaErrH_wi)

 # END OF BACK PROPAGATION OF HIDDEN LAYER

    iter +=1
    
    if (iter > maxrounds):
        break

print ("\nFinished after iteration ", iter, " error =", error, "target=", target[i], ", output=", output, "input final=", inputs[i])    

print ("Weights1 adjusted:", input_weights)
print ("Weights2 adjusted:", output_weights)


Length of inputs 280
input Weights [[ 0.90094218  0.64800375]
 [ 0.63695274  0.75987986]]
output Weights [[ 0.82291974  0.52800769]]
Network Bias [[[ 0.95155327]]

 [[ 0.24708323]]]


ValueError: operands could not be broadcast together with shapes (280,2) (2,2) 

In [38]:
# reference https://pypi.python.org/pypi/python-mnist
# https://github.com/sorki/python-mnist/blob/master/mnist/loader.py
# TRAIN MNIST DATA

mndata = MNIST('./mnist')
images, labels = mndata.load_training()

# test_images, test_labels = mndata.load_testing()

processed_images = mndata.process_images_to_numpy(images)
# print(processed_images[0:2])

target = np.array(labels)
# print(target[0:2])



In [40]:
# set up 2 lists to filter the MNIST data into
# keeping only 0 and 6 to classify
# TRAIN MNIST DATA

filtered_labels = []
filtered_images = []
for i in range(len(target)):
    if target[i]==0 or target[i]==6:
        filtered_labels.append(target[i])
        filtered_images.append(processed_images[i])
        
# remap the value 6 to 1, so classification is binary

# print(filtered_labels[0:10])

for i in range(len(filtered_labels)):
    if filtered_labels[i]==6:
        filtered_labels[i]=1

# print(filtered_labels[0:10])


[0, 6, 6, 0, 6, 0, 6, 0, 6, 0]
[0, 1, 1, 0, 1, 0, 1, 0, 1, 0]


In [35]:
# convert the lists to arrays
# TRAIN MNIST DATA
filtered_labels = np.array(filtered_labels)

filtered_images = np.array(filtered_images)

In [37]:
# set up weights and biases for MNIST data, get nattribs value
# TRAIN MNIST DATA

(train_samples, train_shape) = np.shape(filtered_images)
print ("train: samples =", train_samples, ", attribs =", train_shape)

# Test weights and bias initialisation based on network inputs, hidden layers, and outputs
# first line of moons.csv
#input_X =[[2.07106946, 0.41152931]]

#(nsamples, nattribs) = np.shape(input_X)
n_inputs = train_shape
n_hidden_layer = 1
n_hidden_inputs = 2
n_outputs = 1

# normalise training data
# rescale the inputs using normalization 
mnist_train = preprocessing.normalize(filtered_images)
#print("normalised inputs",inputs)

input_weights = np.array(initialise_input_weights(n_inputs, n_hidden_inputs))
output_weights = np.array(initialise_output_weights(n_hidden_inputs, n_outputs))
#network_weights = initialise_weights(n_inputs, n_hidden_inputs, n_outputs)
network_bias = np.array(initialise_bias(n_hidden_layer))

print("Length of inputs",len(filtered_images))
#print("actual y",input_y)
print("input Weights", len(input_weights))
print("output Weights", len(output_weights))
print("Network Bias", len(network_bias))

train: samples = 11841 , attribs = 784
Length of inputs 11841
input Weights 784
output Weights 1
Network Bias 2


In [49]:
# MNIST TEST DATA
test_images, test_labels = mndata.load_testing()

test_images = mndata.process_images_to_numpy(test_images)
# print(processed_images[0:2])

test_target = np.array(test_labels)

# set up 2 lists to filter the MNIST data into
# keeping only 0 and 6 to classify
# TEST MNIST DATA

filtered_test_labels = []
filtered_test_images = []
for i in range(len(test_target)):
    if test_target[i]==0 or test_target[i]==6:
        filtered_test_labels.append(test_target[i])
        filtered_test_images.append(test_images[i])
        
# remap the value 6 to 1, so classification is binary

# print(filtered_labels[0:10])

for i in range(len(filtered_test_labels)):
    if filtered_test_labels[i]==6:
        filtered_test_labels[i]=1
        
# convert the lists to arrays
# TEST MNIST DATA
filtered_test_labels = np.array(filtered_test_labels)

filtered_test_images = np.array(filtered_test_images)  

In [50]:
print(filtered_test_labels[0:10])

[0 0 1 0 1 1 0 0 1 1]
