In [110]:
%matplotlib inline

In [111]:
import numpy as np

In [112]:
# Data - also - Input layer
training_X= np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1]])
training_Y = np.array([[0],[1], [1], [0]])

In [113]:
# Model a class with input layer 4x3
# Hidden layer 3 x 4
# output layer 4 x 1

class Perceptron(object):
    def __init__(self):
        self.ilw = np.random.rand(3,4) #setup random weights for input layer
        self.hlw = np.random.rand(4,4) # setup random weights for hidden layer
        self.olw = np.random.rand(4,1) # setup random weights for output layer
        
    def sigmoid(self, dot_product):
        return 1 / (1 + np.exp(-dot_product))
    
    def step(self, dot_product):
        step_transform = []
        for item in dot_product:
            if item >= .5:
                step_transform.append(1)
            elif item < .5:
                step_transform.append(0)
        return(np.array(step_transform, ndmin=2)) # need ndmin 2 to avoid (4,) we need (4,1)
    
    def sigmoid_derivative(self, dot_product):
        return dot_product * (1 - dot_product)
            
        

In [114]:
ptron = Perceptron()

In [115]:
# Step one get the dot products for the layers
ilr = np.dot(training_X, ptron.ilw)
hlr = np.dot(ilr, ptron.hlw)
olr = np.dot(hlr, ptron.olw)

In [128]:
print("ilr \n{} \n hlr \n {}\n olr \n {}".format(ilr, hlr, olr))

ilr 
[[ 0.1543471   0.0179523   0.28588332  0.27368384]
 [ 1.26969298  1.42150431  1.49824161  1.5892782 ]
 [ 0.70533756  0.70391266  0.61239252  1.12747305]
 [ 0.71870252  0.73554395  1.17173241  0.735489  ]] 
 hlr 
 [[ 0.25107557  0.44553284  0.28113424  0.18419961]
 [ 2.10404922  2.88609741  2.14859481  2.05092235]
 [ 1.23958157  1.54642688  1.11235379  1.17744773]
 [ 1.11554322  1.78520337  1.31737527  1.05767424]]
 olr 
 [[ 0.6025902 ]
 [ 4.91605594]
 [ 2.71111153]
 [ 2.80753461]]


In [116]:
# step 2 - run each layer through it's activation function
ilr2 = ptron.sigmoid(ilr)
hlr2 = ptron.sigmoid(hlr)
olr2 = ptron.step(olr).T # this is a 1x4 matrix and we need 4x1 to subtract so transpose

In [129]:
print("ilr2 \n{} \n hlr2 \n {}\n olr2 \n {}".format(ilr2, hlr2, olr2))

ilr2 
[[ 0.53851035  0.50448795  0.570988    0.56799706]
 [ 0.78069019  0.80557414  0.81731207  0.83051453]
 [ 0.66937011  0.66905469  0.64848637  0.75537226]
 [ 0.67232124  0.67602067  0.76345801  0.67600864]] 
 hlr2 
 [[ 0.56244122  0.6095766   0.56982428  0.54592014]
 [ 0.89129612  0.94715489  0.89553739  0.88604078]
 [ 0.77549117  0.82439706  0.75256767  0.76448859]
 [ 0.75316109  0.85633819  0.78874469  0.74224584]]
 olr2 
 [[1]
 [1]
 [1]
 [1]]


In [117]:
# step 3 - calculate the error
error = training_Y - olr2

In [130]:
print("error \n {}".format(error))

error 
 [[-1]
 [ 0]
 [ 0]
 [-1]]


In [119]:
# step 4 get the deltas - how much do we need to learn
# we do dot products from right to left through the network
# stepping backwards through each calculation to determine a better set of weights
# until we find the set that gets the results we are looking for
# like the way the human nervous system sends impulses to the brain that then processes and 
# sends back a response

# change in the output layer is derrived from the error

do = ptron.sigmoid_derivative(olr2) * error # delta=output layer-error times derivitive of output layer

dh = do.dot(olr2.T) * ptron.sigmoid_derivative(hlr2) 

# dH = dZ.dot(Wz.T) * sigmoid_(H)

In [131]:
print("do \n {}\n dh\n {}".format(do,dh))

do 
 [[0]
 [0]
 [0]
 [0]]
 dh
 [[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


In [121]:
# step 5 Update the weights for the next iteration or epoch



array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [101]:
olr2.shape, training_Y.shape

((1, 4), (4, 1))

In [65]:
ilr2, hlr2, olr2

(array([[ 0.51594211,  0.63838857,  0.55177584,  0.6184278 ],
        [ 0.7923126 ,  0.78364446,  0.62807226,  0.75307008],
        [ 0.74173467,  0.70912423,  0.56106647,  0.71628079],
        [ 0.58606164,  0.72397688,  0.61923741,  0.66191659]]),
 array([[ 0.62349393,  0.55756877,  0.67306074,  0.64910651],
        [ 0.82083168,  0.66163391,  0.89604174,  0.93156158],
        [ 0.72930755,  0.6033624 ,  0.83683684,  0.87373495],
        [ 0.73793922,  0.61831204,  0.77576938,  0.78442603]]),
 array([1, 1, 1, 1]))

In [64]:
ptron.ilw, ptron.hlw, ptron.olw

(array([[ 0.99121407,  0.32275752,  0.03764298,  0.4432068 ],
        [ 0.28391783,  0.39589792,  0.27846427,  0.18896654],
        [ 0.06379005,  0.56837692,  0.20784839,  0.48288038]]),
 array([[ 0.27965337,  0.09262718,  0.40034355,  0.99002406],
        [ 0.53417628,  0.06843972,  0.3129677 ,  0.45601461],
        [ 0.85431021,  0.63266396,  0.39868224,  0.50902435],
        [ 0.01115239,  0.11388817,  0.90244741,  0.38720216]]),
 array([[ 0.12651864],
        [ 0.50352375],
        [ 0.26456513],
        [ 0.6554159 ]]))