In [139]:
%matplotlib inline

In [140]:
import numpy as np

In [141]:
# Data - also - Input layer
training_X= np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1]])
training_Y = np.array([[0],[0], [1], [1]])

input_layer_features = 3
hidden_layer_nodes = 2
output_layer_nodes = 1

In [142]:


class Perceptron(object):
    def __init__(self):
#         self.ilw = np.random.rand(3,4) #setup random weights for input layer
        self.hlw = np.random.rand(input_layer_features,hidden_layer_nodes) # setup random weights for hidden layer
        self.olw = np.random.rand(hidden_layer_nodes, output_layer_nodes) # setup random weights for output layer
        
    def sigmoid(self, dot_product):
        return 1 / (1 + np.exp(-dot_product))
    
    def step(self, dot_product):
        step_transform = []
        for item in dot_product:
            if item >= .5:
                step_transform.append(1)
            elif item < .5:
                step_transform.append(0)
        return(np.array(step_transform, ndmin=2)) # need ndmin 2 to avoid (4,) we need (4,1)
    
    def sigmoid_derivative(self, dot_product):
        return dot_product * (1 - dot_product)
            
        

In [143]:
ptron = Perceptron()

In [144]:
# Step one get the dot products for the layers

# these give us the magnitude of the nodes' reaction to the input from the previous layer
# how much stimulation is this neuron receiving from the input - how lit up is it
hlr = np.dot(training_X, ptron.hlw) 
olr = np.dot(hlr, ptron.olw)

In [145]:
print("hlr \n {}\n olr \n {}".format(hlr, olr))

hlr 
 [[  7.50101915e-04   7.18988488e-01]
 [  1.28469628e+00   1.46608641e+00]
 [  4.12473766e-01   8.20038391e-01]
 [  8.72972617e-01   1.36503651e+00]]
 olr 
 [[ 0.45063523]
 [ 1.34487723]
 [ 0.65061957]
 [ 1.14489289]]


In [146]:
# step 2 - run each layer through it's activation function

# this takes the stimulation from above and gives us a classifier to help figure out if the node
# is firing the way we want it to.  Similar to logistic regression classification but different
# because we are looking at how right it is not just whether it is right or wrong.

hlr2 = ptron.sigmoid(hlr)
olr2 = ptron.sigmoid(olr) 

In [147]:
print("hlr2 \n {}\n olr2 \n {}".format(hlr2, olr2))

hlr2 
 [[ 0.50018753  0.67238424]
 [ 0.78324813  0.81246181]
 [ 0.60168089  0.69424449]
 [ 0.70536386  0.79657704]]
 olr2 
 [[ 0.61079025]
 [ 0.79329086]
 [ 0.65715007]
 [ 0.75857685]]


In [148]:
# step 3 - calculate the error
# Learning begins by calculating how far of we are from what was experienced and what was desired.
# so we calulate the error between expected results and what we calculated

error = training_Y - olr2

In [149]:
print("error \n {}".format(error))

error 
 [[-0.61079025]
 [-0.79329086]
 [ 0.34284993]
 [ 0.24142315]]


In [150]:
# step 4 get the deltas - how much do we need to learn
# we do dot products from right to left through the network
# stepping backwards through each calculation to determine a better set of weights
# until we find the set that gets the results we are looking for
# like the way the human nervous system sends impulses to the brain that then processes and 
# sends back a response

# change in the output layer is calculated by the error
# change in the hidden layer is calculated in part by the change in the output layer

# talk to Kyle about learning coefficient - multiply by another constant to tune?

# sigmoid derivative gives us the instantaneous rate of change or slope at that point on the 
# sigmoid curve as x approaches 0

do = error * ptron.sigmoid_derivative(olr2) # delta=output layer-error times derivitive of output layer

# this is trying to multiply a 4x1 by a 4x5 matrix
dh = do.dot(olr2.T) #* ptron.sigmoid_derivative(hlr2) 

# dH = dZ.dot(Wz.T) * sigmoid_(H)

In [151]:
ptron.sigmoid_derivative(hlr2)

array([[ 0.24999996,  0.22028368],
       [ 0.1697705 ,  0.15236762],
       [ 0.239661  ,  0.21226908],
       [ 0.20782568,  0.16204206]])

In [152]:
print("do \n {}\n dh\n {}".format(do,dh))

do 
 [[-0.14520043]
 [-0.13008421]
 [ 0.07724541]
 [ 0.04421376]]
 dh
 [[-0.08868701 -0.11518617 -0.09541847 -0.11014568]
 [-0.07945417 -0.10319461 -0.08548485 -0.09867887]
 [ 0.04718074  0.06127808  0.05076183  0.05859658]
 [ 0.02700533  0.03507437  0.02905507  0.03353953]]


In [150]:
# step 5 Update the weights for the next iteration or epoch
#  Wz +=  H.T.dot(dZ)                          # update output layer weights
ptron.ilw = np.dot(ilr2,olr2.T)


In [151]:
ptron.ilw

array([[ 2.2259383 ],
       [ 3.01223666],
       [ 2.63803963],
       [ 2.70841929]])

In [101]:
olr2.shape, training_Y.shape

((1, 4), (4, 1))

In [65]:
ilr2, hlr2, olr2

(array([[ 0.51594211,  0.63838857,  0.55177584,  0.6184278 ],
        [ 0.7923126 ,  0.78364446,  0.62807226,  0.75307008],
        [ 0.74173467,  0.70912423,  0.56106647,  0.71628079],
        [ 0.58606164,  0.72397688,  0.61923741,  0.66191659]]),
 array([[ 0.62349393,  0.55756877,  0.67306074,  0.64910651],
        [ 0.82083168,  0.66163391,  0.89604174,  0.93156158],
        [ 0.72930755,  0.6033624 ,  0.83683684,  0.87373495],
        [ 0.73793922,  0.61831204,  0.77576938,  0.78442603]]),
 array([1, 1, 1, 1]))

In [64]:
ptron.ilw, ptron.hlw, ptron.olw

(array([[ 0.99121407,  0.32275752,  0.03764298,  0.4432068 ],
        [ 0.28391783,  0.39589792,  0.27846427,  0.18896654],
        [ 0.06379005,  0.56837692,  0.20784839,  0.48288038]]),
 array([[ 0.27965337,  0.09262718,  0.40034355,  0.99002406],
        [ 0.53417628,  0.06843972,  0.3129677 ,  0.45601461],
        [ 0.85431021,  0.63266396,  0.39868224,  0.50902435],
        [ 0.01115239,  0.11388817,  0.90244741,  0.38720216]]),
 array([[ 0.12651864],
        [ 0.50352375],
        [ 0.26456513],
        [ 0.6554159 ]]))

In [None]:
# https://stackoverflow.com/questions/24560298/python-numpy-valueerror-operands-could-not-be-broadcast-together-with-shapes