# Neural Network Implementation

The following is a 2-layer neural network implementation that creates a model for the XOR of the first two inputs.

Source of base code: http://iamtrask.github.io/2015/07/12/basic-python-network/

In [1]:
import numpy as np

# sigmoid function
def sigmoid(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))
    
# input dataset
train_data = np.array([ [0,0,1],
                        [0,1,1],
                        [1,0,1],
                        [1,1,1] ])

ERROR_THRESHOLD = 0.005
    
# output dataset            
train_target = np.array([[0],
                         [1],
                         [1],
                         [0]])

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
syanpse_0 = 2*np.random.random((3,4)) - 1
syanpse_1 = 2*np.random.random((4,1)) - 1

# iterate many times with backpropogation
for j in xrange(60000):

    # forward propagation
    input_layer = train_data
    hidden_layer = sigmoid(np.dot(input_layer,syanpse_0))
    output_layer = sigmoid(np.dot(hidden_layer,syanpse_1))

    # how much did we miss?
    output_layer_error = train_target - output_layer

    if (j% 10000) == 0:
        print "Error:" + str(np.mean(np.abs(output_layer_error)))
    
    # end iteration once error approaches threshold
    #max_error = np.max(np.abs(output_layer_error))
    #if max_error <= ERROR_THRESHOLD: break
    
    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    output_layer_delta = output_layer_error*sigmoid(output_layer,deriv=True)
    
    hidden_layer_error = output_layer_delta.dot(syanpse_1.T)
    
    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    hidden_layer_delta = hidden_layer_error * sigmoid(hidden_layer,True)

    # update weights
    syanpse_1 += hidden_layer.T.dot(output_layer_delta)
    syanpse_0 += input_layer.T.dot(hidden_layer_delta)
    
def pred(input):
    hidden = sigmoid(np.dot(input,syanpse_0))
    output = sigmoid(np.dot(hidden,syanpse_1))
    return output

Error:0.496410031903
Error:0.00858452565325
Error:0.00578945986251
Error:0.00462917677677
Error:0.00395876528027
Error:0.00351012256786


In [2]:
# Test the model on different cases (Expected output should be [0,0])
pred([[1,1,0], [0,0,0]])

array([[ 0.00676682],
       [ 0.24190494]])


# Application to MINST Problem

In the following, I attempt to create a three-layer network for use in solving the MINST problem.

This does not yet work.

MINST Data from: https://www.kaggle.com/c/digit-recognizer

In [3]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn import cross_validation
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.neural_network import MLPClassifier

import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline

In [4]:
#Retrieve the test data from the filesystem
data = pd.read_csv("Kaggle Competition MINST train.csv")
target = data['label']
data = data.drop('label', axis=1)

num_attributes = len(data.columns)

# Split the training data so that I can analyze testing error
train_data, test_data, train_target, test_target = cross_validation.train_test_split(
 data, target, test_size=0.99, random_state=0)

In [5]:
test_example = np.array(data.div(255).loc[0:10])
target_example = np.array(target.to_frame().div(10).loc[0:10])
print target_example

[[ 0.1]
 [ 0. ]
 [ 0.1]
 [ 0.4]
 [ 0. ]
 [ 0. ]
 [ 0.7]
 [ 0.3]
 [ 0.5]
 [ 0.3]
 [ 0.8]]


In [6]:
# Create and train the neural network
X = test_example
y = target_example

#np.random.seed(1)

syn0 = 2*np.random.random((num_attributes, num_attributes)) - 1
syn1 = 2*np.random.random((num_attributes, num_attributes/2)) - 1
syn2 = 2*np.random.random((num_attributes/2,1)) - 1
for j in xrange(5):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l3 = 1/(1+np.exp(-(np.dot(l2,syn2))))
    
    l3_delta = (y - l3)*(l3*(1-l3))
    l2_delta = l3_delta*(syn2.T) * (l2 * (1-l2))
    l1_delta = l2_delta*(syn1) * (l1 * (1-l1))
    
    #syn2 += l2.T.dot(l3_delta)
    #syn1 += l1.T.dot(l2_delta)
    #syn0 += X.T.dot(l1_delta)
    
def pred(input):
    l1 = 1/(1+np.exp(-(np.dot(input,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    return l2

ValueError: operands could not be broadcast together with shapes (11,392) (784,392) 

In [None]:


print pred(test_example)
print target_example

In [None]:
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
for j in xrange(60000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)
print y