## Imports

In [1]:
import numpy as np

Initialization
-------------

Set up Hyperparameters
* InputSize
* OutputSize
* HiddenLayerWidth
* HiddenLayerHeight

In [2]:
class perceptron(object):
    def __init__(self, input_size, output_size, hl_neurons, hl_layers, learningrate):
        self.learningrate = learningrate
        self.input_size = input_size
        self.output_size = output_size
        self.hl_neurons = hl_neurons
        self.hl_layers = hl_layers
        
        # Now setup the weights in this format (inputs x neurons)
        self.weights = []
        self.ths = []
        self.weights.append(np.random.rand(input_size, hl_neurons))
        self.ths.append(np.random.rand(1,hl_neurons))
        for i in xrange(1, hl_layers):
            self.weights.append(np.random.rand(hl_neurons, hl_neurons))
            self.ths.append(np.random.rand(1, hl_neurons))
        self.weights.append(np.random.rand(hl_neurons, output_size))
        self.ths.append(np.random.rand(1, output_size))
    
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def sigmoid_delta(self, z):
        return (-np.exp(-z))/((1+np.exp(-z))**2)
    
    def feed_forward(self, inputs):
        output = inputs
        for i in xrange(0, len(self.weights)):
            output = np.dot(output, self.weights[i])
            output += self.ths[i]
            output = self.sigmoid(output)
        return output
    
    def train(self, inputs, truths):
        # First get the error and delta for output layer
        A = []
        Z = []
        output = inputs
        for i in xrange(0, len(self.weights)):
            output = np.dot(output, self.weights[i]) + self.ths[i]
            Z.append(output)
            output = self.sigmoid(output)
            A.append(output)
        curdelta = (truths - output) * self.sigmoid_delta(Z[len(Z)-1])
        curWL = np.transpose(self.weights[len(self.weights)-1])
         # Now go backwards
        deltas = [curdelta]
        
        for i in reversed(xrange(0, len(self.weights)-1)):
            curdelta = np.dot(curdelta, curWL) * self.sigmoid_delta(Z[i])
            curWL = np.transpose(self.weights[i])
            deltas.insert(0, curdelta)
            
        ## Perform gradient descent
        for i in reversed(xrange(1, len(self.weights))):
            A_av = np.array([np.mean(A[i-1][:,x]) for x in xrange(0, len(A[i-1][0]))])
            print "Delta %d: %s" % (i, np.array2string(deltas[i]))
            print "A     %d: %s" % (i-1, A[i-1])
            print "A_av  %d: %s" % (i-1, A_av)
            print "W     %d: %s" % (i, self.weights[i])
            print "A * delta: %s" % (np.dot((np.matrix(A[i-1])), np.transpose(deltas[i])))
            delta_av = np.array([np.mean(deltas[i][:,x]) for x in xrange(0,len(deltas[i][0]))])
            self.weights[i] -= self.learningrate * (np.dot(deltas[i], np.matrix(A_av)))
            self.ths[i] -= self.learningrate * delta_av
        
        return truths - self.feed_forward(inputs)

In [3]:
# Create an new perceptron for testing
pp = perceptron(2, 1, 3, 1, 0.1)

Testing Input (AND)
---------
Sample truth table for AND:

A | B | **Output**
--- | --- | ---
0 | 0 | 0
0 | 1 | 0
1 | 0 | 0
1 | 1 | 1

In [4]:
input_AND = np.array([[0,0],[0,1],[1,0],[1,1]])
gtruth_AND = np.array([[0],[0],[0],[1]])

In [5]:
pp.train(input_AND, gtruth_AND)
abs(gtruth_AND - pp.feed_forward(input_AND))

Delta 1: [[ 0.07327707]
 [ 0.06277067]
 [ 0.05741011]
 [-0.00290618]]
A     0: [[ 0.61910204  0.68310824  0.58328322]
 [ 0.63324645  0.71684842  0.75333066]
 [ 0.77609223  0.846152    0.61313904]
 [ 0.78641809  0.86593939  0.77568859]]
A_av  0: [ 0.7037147   0.77801201  0.68136038]
W     1: [[ 0.9699894 ]
 [ 0.8336305 ]
 [ 0.95318225]]


ValueError: shapes (4,3) and (1,4) not aligned: 3 (dim 1) != 1 (dim 0)

In [259]:
pp.feed_forward(input_AND)

array([[ 0.75133662],
       [ 0.75906811],
       [ 0.75261174],
       [ 0.76041134]])

In [157]:
deltas[0]

array([[-0.00700885, -0.00828953, -0.01480633],
       [-0.00618889, -0.00579307, -0.00895242],
       [-0.0042668 , -0.00755741, -0.00962132],
       [ 0.0004433 ,  0.000628  ,  0.00061614]])

In [158]:
pp.ths[0]

array([[ 0.97877439,  0.75552273,  0.96811647]])

In [162]:
np.mean(deltas[:][0])

-0.0058997649885304338

In [173]:
[np.mean(deltas[0][:,x]) for x in xrange(0,len(deltas[0][0]))]

[-0.0042553115390108233, -0.0052530018209328306, -0.0081909816056476466]