In [133]:
%matplotlib inline

In [134]:
import numpy as np

In [135]:
# Data - also - Input layer
training_X= np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1]])
training_Y = np.array([[0],[1], [1], [0]])

In [136]:
# Model a class with input layer 4x3
# Hidden layer 3 x 4
# output layer 4 x 1

class Perceptron(object):
    def __init__(self):
        self.ilw = np.random.rand(3,4) #setup random weights for input layer
        self.hlw = np.random.rand(4,4) # setup random weights for hidden layer
        self.olw = np.random.rand(4,1) # setup random weights for output layer
        
    def sigmoid(self, dot_product):
        return 1 / (1 + np.exp(-dot_product))
    
    def step(self, dot_product):
        step_transform = []
        for item in dot_product:
            if item >= .5:
                step_transform.append(1)
            elif item < .5:
                step_transform.append(0)
        return(np.array(step_transform, ndmin=2)) # need ndmin 2 to avoid (4,) we need (4,1)
    
    def sigmoid_derivative(self, dot_product):
        return dot_product * (1 - dot_product)
            
        

In [137]:
ptron = Perceptron()

In [138]:
# Step one get the dot products for the layers
ilr = np.dot(training_X, ptron.ilw)
hlr = np.dot(ilr, ptron.hlw)
olr = np.dot(hlr, ptron.olw)

In [139]:
print("ilr \n{} \n hlr \n {}\n olr \n {}".format(ilr, hlr, olr))

ilr 
[[ 0.73965863  0.88358669  0.60861958  0.01903352]
 [ 1.78143152  1.73027882  2.09558792  1.61810856]
 [ 1.52581868  0.96322956  1.34975098  0.74040638]
 [ 0.99527146  1.65063595  1.35445652  0.89673569]] 
 hlr 
 [[ 1.39285     1.28318946  0.92102226  0.51593638]
 [ 3.58766873  3.61237949  2.15562882  1.99525044]
 [ 2.41541104  2.60815378  1.3621007   1.18499108]
 [ 2.56510768  2.28741517  1.71455038  1.32619574]]
 olr 
 [[ 1.21522192]
 [ 3.0552578 ]
 [ 2.00846982]
 [ 2.2620099 ]]


In [140]:
# step 2 - run each layer through it's activation function
ilr2 = ptron.sigmoid(ilr)
hlr2 = ptron.sigmoid(hlr)
olr2 = ptron.sigmoid(olr).T # this is a 1x4 matrix and we need 4x1 to subtract so transpose

In [141]:
print("ilr2 \n{} \n hlr2 \n {}\n olr2 \n {}".format(ilr2, hlr2, olr2))

ilr2 
[[ 0.6769212   0.70756492  0.64762584  0.50475824]
 [ 0.85587354  0.84944808  0.89047361  0.83453411]
 [ 0.82139371  0.72376795  0.79408891  0.67708471]
 [ 0.73012788  0.83897698  0.79485726  0.71027822]] 
 hlr2 
 [[ 0.80104684  0.7829922   0.71525035  0.62619707]
 [ 0.97308188  0.97372164  0.8961936   0.8802975 ]
 [ 0.91799495  0.9313845   0.7961009   0.76584403]
 [ 0.92858193  0.90782939  0.84742556  0.79021067]]
 olr2 
 [[ 0.7712216   0.95500898  0.88168349  0.90568146]]


In [142]:
# step 3 - calculate the error
error = training_Y - olr2

In [143]:
print("error \n {}".format(error))

error 
 [[-0.7712216  -0.95500898 -0.88168349 -0.90568146]
 [ 0.2287784   0.04499102  0.11831651  0.09431854]
 [ 0.2287784   0.04499102  0.11831651  0.09431854]
 [-0.7712216  -0.95500898 -0.88168349 -0.90568146]]


In [144]:
# step 4 get the deltas - how much do we need to learn
# we do dot products from right to left through the network
# stepping backwards through each calculation to determine a better set of weights
# until we find the set that gets the results we are looking for
# like the way the human nervous system sends impulses to the brain that then processes and 
# sends back a response

# change in the output layer is derrived from the error

do = ptron.sigmoid_derivative(olr2) * error # delta=output layer-error times derivitive of output layer

dh = do.dot(olr2.T) * ptron.sigmoid_derivative(hlr2) 

# dH = dZ.dot(Wz.T) * sigmoid_(H)

In [145]:
print("do \n {}\n dh\n {}".format(do,dh))

do 
 [[-0.13607345 -0.04103371 -0.09197521 -0.07736562]
 [ 0.0403654   0.00193312  0.01234251  0.00805693]
 [ 0.0403654   0.00193312  0.01234251  0.00805693]
 [-0.13607345 -0.04103371 -0.09197521 -0.07736562]]
 dh
 [[-0.04706092 -0.05017466 -0.06014131 -0.06912026]
 [ 0.00133996  0.00130897  0.00475908  0.0053905 ]
 [ 0.00385104  0.00326925  0.00830386  0.00917365]
 [-0.01958303 -0.02470861 -0.03817992 -0.04895284]]


In [150]:
# step 5 Update the weights for the next iteration or epoch
#  Wz +=  H.T.dot(dZ)                          # update output layer weights
ptron.ilw = np.dot(ilr2,olr2.T)


In [151]:
ptron.ilw

array([[ 2.2259383 ],
       [ 3.01223666],
       [ 2.63803963],
       [ 2.70841929]])

In [101]:
olr2.shape, training_Y.shape

((1, 4), (4, 1))

In [65]:
ilr2, hlr2, olr2

(array([[ 0.51594211,  0.63838857,  0.55177584,  0.6184278 ],
        [ 0.7923126 ,  0.78364446,  0.62807226,  0.75307008],
        [ 0.74173467,  0.70912423,  0.56106647,  0.71628079],
        [ 0.58606164,  0.72397688,  0.61923741,  0.66191659]]),
 array([[ 0.62349393,  0.55756877,  0.67306074,  0.64910651],
        [ 0.82083168,  0.66163391,  0.89604174,  0.93156158],
        [ 0.72930755,  0.6033624 ,  0.83683684,  0.87373495],
        [ 0.73793922,  0.61831204,  0.77576938,  0.78442603]]),
 array([1, 1, 1, 1]))

In [64]:
ptron.ilw, ptron.hlw, ptron.olw

(array([[ 0.99121407,  0.32275752,  0.03764298,  0.4432068 ],
        [ 0.28391783,  0.39589792,  0.27846427,  0.18896654],
        [ 0.06379005,  0.56837692,  0.20784839,  0.48288038]]),
 array([[ 0.27965337,  0.09262718,  0.40034355,  0.99002406],
        [ 0.53417628,  0.06843972,  0.3129677 ,  0.45601461],
        [ 0.85431021,  0.63266396,  0.39868224,  0.50902435],
        [ 0.01115239,  0.11388817,  0.90244741,  0.38720216]]),
 array([[ 0.12651864],
        [ 0.50352375],
        [ 0.26456513],
        [ 0.6554159 ]]))