In [None]:
# Package imports
import matplotlib
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import pandas as pd
import numpy as np
from decimal import Decimal
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from mnist import MNIST
import datetime
# Display plots inline and change default figure size
%matplotlib inline

In [None]:
########### ---------  PART ONE THE SETUP  -------------  ################
##
## Other REFERENCES
## https://arxiv.org/pdf/1606.04838.pdf
## http://www.alivelearn.net/deeplearning/dlnd-your-first-neural-network.html
## http://neuralnetworksanddeeplearning.com/chap1.html & chap2.html
## https://web.stanford.edu/class/cs294a/sparseAutoencoder.pdf
## https://arxiv.org/ftp/arxiv/papers/1404/1404.1559.pdf
## https://pypi.python.org/pypi/python-mnist
## https://becominghuman.ai/back-propagation-is-very-simple-who-made-it-complicated-97b794c97e5c
## https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
## http://cs231n.github.io/understanding-cnn/
## Andrew Ngs lectures on Coursera
## http://www.holehouse.org/mlclass/09_Neural_Networks_Learning.html
##
####################

In [None]:
## REFERENCE from Michael Madden's LoadDataset ipynb

# Use pandas to read the CSV file as a dataframe
df = pd.read_csv("moons400.csv")

# The y values are those labelled 'Class': extract their values
y = df['Class'].values

X = df.as_matrix() # convert the remaining columns to a numpy array


# np.shape returns all dimensions of the array
(nsamples, nattribs) = np.shape(X)
print ("X: nsamples =", nsamples, ", nattribs =", nattribs)

# using sklearn.model_selection.train_test_split to split up data into train and test sets split 70/30
train_X, test_X, train_y, test_y = train_test_split(df, y, test_size=0.30)

# The x train and test values are all other columns
del train_X['Class']    # drop the 'Class' column from the Train and test dataframe
del test_X['Class']

print("train: ", train_X.shape, train_y.shape)
print("test: ", test_X.shape, test_y.shape)

In [None]:
# rescale the inputs using normalization 
inputs = preprocessing.normalize(train_X)
inputs = np.array(inputs, dtype=np.float64)

In [None]:
target = train_y
target = np.reshape(np.array(target, dtype=np.float64),(dsamples, 1))

In [1]:
# initialise the weights for the network based on the input layers, the number of hidden layers, the number of output layers
#reference for above https://www.coursera.org/learn/deep-neural-network/lecture/RwqYe/weight-initialization-for-deep-networks
def initialise_input_weights(n_inputs, n_hidden_inputs):
 hidden_layer_weights = list()
 for i in range(n_hidden_inputs):
  weight = np.random.randn(n_inputs)*np.sqrt(1/(n_inputs)**(n_hidden_inputs-1))
  hidden_layer_weights.append(weight)
   
 input_weights = np.array([hidden_layer_weights])
 input_weights = np.reshape(input_weights, (n_inputs, n_hidden_inputs))
 return input_weights; 

def initialise_output_weights(n_hidden_inputs,n_outputs):
 output_layer_weights = list()
 for i in range(n_hidden_inputs):
  weight = np.random.randn(n_outputs)*np.sqrt(1/(n_outputs)**(n_hidden_inputs-1))
  output_layer_weights.append(weight) 
   
 output_weights = np.array(output_layer_weights, dtype=np.float64)
 #if n_outputs == 1:
  #output_weights = np.array([output_layer_weights])
 #elif n_outputs > 1:
  #output_weights = np.array([output_layer_weights])  
  #output_weights = np.reshape(output_weights, (n_hidden_inputs,n_outputs))   

 return output_weights;



#initialise the bias for the network based on the number of hidden layers and the output layer bias
def initialise_bias(n_hidden_layer):
 hidden_layer_bias = list()    
 for i in range(n_hidden_layer):
  bias = np.random.random(1)[0]
  hidden_layer_bias.append(bias)
 
 output_layer_bias = [np.random.random(1)[0]]
 network_bias = [[hidden_layer_bias],[output_layer_bias]]
 return network_bias;


In [None]:
# FORWARD PROPAGATION FUNCTIONS

def sigmoid(inp, weight, bias):
    output = np.dot(inp, weight) +bias
    output = 1/(1+np.exp(-output))
    return output;

def get_error(t, o):
    error_o = 0.5*((t - o)**2)
    return error_o;

# FUNCTIONS FOR BACK PROPAGATION OF OUTPUT

def sigmoid_deriv(out):
    result = out*(1-out)
    return result; 


# convert the above to a function
def calc_adjusted_weights(W, deriv):
    W = W - (alpha*deriv)
    return W;


In [None]:
# set up weights and biases 
(train_samples, train_shape) = np.shape(inputs)
print ("train: samples =", train_samples, ", attribs =", train_shape)

# Test weights and bias initialisation based on network inputs, hidden layers, and outputs
#(nsamples, nattribs) = np.shape(input_X)
n_inputs = train_shape

n_hidden_layer = 1
n_hidden_inputs = 2
n_outputs = 1


Weights1 = np.array(initialise_input_weights(n_inputs, n_hidden_inputs), dtype=np.float64)
Weights2 = np.reshape(initialise_output_weights(n_hidden_inputs, n_outputs), (n_hidden_inputs,n_outputs))
bias = np.array(initialise_bias(n_hidden_layer), dtype=np.float64)


In [None]:
########### ---------  PART TWO TEST THE SIMPLE DATASET  -------------  ################

In [None]:
#### TRAIN THE MODEL #####
#print("Weights2:", Weights2)
#print("Weights1:", Weights1)
iter=0
np.seterr(all='ignore') 
alpha = 0.5
#TRAIN MODEL
avg_err  =100
print(datetime.datetime.now())
threshold=1e-5
maxrounds=60000
dE=1
dH = 0
err=[]
count=[]
error = 99.0
avg_err = 99.0
#for i in range(maxrounds):
if abs(avg_err) > threshold:

    # START OF FORWARD PROPAGATION FUNCTION CALLS

    hidden_layer = sigmoid(inputs, Weights1, bias[0])
        
    output = sigmoid(hidden_layer, Weights2, bias[1])

    # call error function
    error = get_error(target, output)
    avg_err = np.sum(error)/nsamples

    # START OF BACK PROPAGATION FUNCTION CALLS
    # REFERENCE http://python3.codes/neural-network-python-part-1-sigmoid-function-gradient-descent-backpropagation/

    dE = (output-target)*sigmoid_deriv(output)

    dH = np.dot(dE,Weights2.T)*sigmoid_deriv(hidden_layer)

    dW2 = np.dot(hidden_layer.T,dE)
    dW1 = dH.T.dot(inputs)

    Weights2 = Weights2 - alpha*dW2
    Weights1 = Weights1 - alpha*dW1
        

    iter=iter+1
    
else:
    print ("\nFinished after ", iter, " error =", error)#"target=", target[i], ", output=", output, "input final=", inputs[i])    
    print(datetime.datetime.now())



In [None]:
#### TEST THE MODEL #####

inputs = preprocessing.normalize(test_X)
target = test_y
target = np.array(target)

error = 99.0

error_res = []
result = []


for i in range(len(inputs)):

        
    ### START OF FORWARD PROPAGATION FUNCTION CALLS
    hidden_layer = sigmoid(inputs[i], Weights1, bias[0])
        
    output = sigmoid(hidden_layer, Weights2, bias[1])

    # call error function
    error = get_error(target[i], output)
    avg_err = np.sum(error)/nsamples
    
    #  print ("output:", output)
    result.append(output)
        
    error_res.append(error)
    #print("error:", error)
        
    ### END OF FORWARD PROPAGATION FUNCTION CALLS
    
result = np.array(result)  
error_res = np.array(error_res)
target = np.array(target)
print("Target:", target)        
print("Results: ", result)
#print("Error:", error_res)

#plt.plot(result, target)  

In [None]:
########### ---------  PART THREE TEST THE DIFFICULT DATASET  -------------  ################

In [None]:
# LOAD MNIST DATA
# reference https://pypi.python.org/pypi/python-mnist
# https://github.com/sorki/python-mnist/blob/master/mnist/loader.py

mndata = MNIST('./mnist')
images, labels = mndata.load_training()

processed_images = mndata.process_images_to_numpy(images)
# print(processed_images[0:2])

target = np.array(labels)
# print(target[0:2])

In [None]:
# FILTER MNIST DATA and REMAP 6 to 1
# set up 2 lists to filter the MNIST data into
# keeping only 0 and 6 to classify


filtered_labels = []
filtered_images = []


for i in range(len(target)):
    if target[i]==0 or target[i]==6:
        filtered_labels.append(target[i])
        filtered_images.append(processed_images[i])
        
# remap the value 6 to 1, so classification is binary

# print(filtered_labels[0:10])

for i in range(len(filtered_labels)):
    if filtered_labels[i]==6:
        filtered_labels[i]=1

# print(filtered_labels[0:10])

# convert the lists to arrays
# TRAIN MNIST DATA
filtered_labels = np.array(filtered_labels)

filtered_images = np.array(filtered_images)

In [None]:
# set up weights and biases for MNIST data, get nattribs value

(train_samples, train_shape) = np.shape(filtered_images)
print ("train: samples =", train_samples, ", attribs =", train_shape)

#(nsamples, nattribs) = np.shape(input_X)
n_inputs = train_shape
n_hidden_layer = 1
n_hidden_inputs = 2
n_outputs = 1

# normalise training data
# rescale the inputs using normalization 
mnist_train = preprocessing.normalize(filtered_images)
#print("normalised inputs",inputs)

input_weights = np.array(initialise_input_weights(n_inputs, n_hidden_inputs))
output_weights = np.array(initialise_output_weights(n_hidden_inputs, n_outputs))
bias = np.array(initialise_bias(n_hidden_layer))

#validate everything
print("Length of inputs",len(filtered_images))
#print("actual y",input_y)
print("input Weights", len(input_weights))
print("output Weights", len(output_weights))
print("Network Bias", len(network_bias))

In [None]:
# http://www.holehouse.org/mlclass/09_Neural_Networks_Learning.html
# REFERENCE FOR mini batch logci https://wiseodd.github.io/techblog/2016/06/21/nn-sgd/
#TRAIN MNIST DATA
iter=0
np.seterr(all='ignore') 
alpha = 0.5
#TRAIN MODEL
avg_err  =100
print(datetime.datetime.now())
threshold=1e-5
maxrounds=60000
dE=1
dH = 0
err=[]
count=[]
error = 99.0
avg_err = 99.0
minibatch_size = 1000
if abs(avg_err) > threshold:
    for i in range(0, mnist_train.shape[0], minibatch_size):
        # Get pair of (X, y) of the current minibatch/chunk
        inputs = mnist_train[i:i + minibatch_size]
        target = filtered_labels[i:i + minibatch_size]

        # START OF FORWARD PROPAGATION FUNCTION CALLS

        hidden_layer = sigmoid(inputs, Weights1, bias[0])
        
        output = sigmoid(hidden_layer, Weights2, bias[1])

    # call error function
        error = get_error(target, output)
        avg_err = np.sum(error)/nsamples

        dE = (output-target)*sigmoid_deriv(output)

        dH = np.dot(dE,Weights2.T)*sigmoid_deriv(hidden_layer)

        dW2 = np.dot(hidden_layer.T,dE)
        dW1 = dH.T.dot(inputs)

        Weights2 = Weights2 - alpha*dW2
        Weights1 = Weights1 - alpha*dW1 

    iter=iter+1
    
else:
    print ("\nFinished after ", iter, " error =", error)#"target=", target[i], ", output=", output, "input final=", inputs[i])    
    print(datetime.datetime.now())
#print ("Weights1 adjusted:", Weights1)
#print ("Weights2 adjusted:", Weights2)


# Plot and embed in ipython notebook!
#plt.scatter(count, err)
#plt.show()

In [None]:
# SETUP MNIST TEST DATA
test_images, test_labels = mndata.load_testing()

test_images = mndata.process_images_to_numpy(test_images)
# print(processed_images[0:2])

test_target = np.array(test_labels)

# set up 2 lists to filter the MNIST data into
# keeping only 0 and 6 to classify
# TEST MNIST DATA

filtered_test_labels = []
filtered_test_images = []
for i in range(len(target)):
    if target[i]==0 or target[i]==6:
        filtered_test_labels.append(target[i])
        filtered_test_images.append(processed_images[i])
        
# remap the value 6 to 1, so classification is binary

# print(filtered_labels[0:10])

for i in range(len(filtered_test_labels)):
    if filtered_test_labels[i]==6:
        filtered_test_labels[i]=1
        
# convert the lists to arrays
# TEST MNIST DATA
filtered_test_labels = np.array(filtered_test_labels)

filtered_test_images = np.array(filtered_test_images)  

In [None]:
## TEST MNIST DATA
inputs = preprocessing.normalize(test_X)
target = test_y
target = np.array(target)

error = 99.0

error_res = []
result = []


for i in range(len(inputs)):

        
    ### START OF FORWARD PROPAGATION FUNCTION CALLS
    hidden_layer = sigmoid(inputs[i], Weights1, bias[0])
        
    output = sigmoid(hidden_layer, Weights2, bias[1])

    # call error function
    error = get_error(target[i], output)
    avg_err = np.sum(error)/nsamples
    
    #  print ("output:", output)
    result.append(output)
        
    error_res.append(error)
    #print("error:", error)
        
    ### END OF FORWARD PROPAGATION FUNCTION CALLS
    
result = np.array(result)  
error_res = np.array(error_res)
target = np.array(target)
print("Target:", target)        
print("Results: ", result)
#print("Error:", error_res)

#plt.plot(result, target) 

In [None]:
########### ---------  PART FOUR THE ENHANCEMENT  -------------  ################

In [None]:
# use relu activation in layer
def Leaky_relU(x_input):
    x_input = np.maximum(0.000001,x_input)
    return x_input

# use derivative of relu in back prop layer
def deriv_Leaky_relU(y_input):
    if y_input <= 0:
        y_input = 0.000001
    elif y_input >0:
        y_input = 1        
    return y_input