In [90]:
import numpy as np

class MLP:
    '''Multilayer perceptron class.
    
    Parameters:
    layers -- Sequence of ints giving the number of neurons in each layer.
    
    weights -- Sequence of (len(layer) - 1) numpy matrices W. W[l][j][i] gives the weight
        of the synaptic connection from neuron i of layer l to neuron j of layer (l+1).
    
    transfer_functions -- Sequence of (len(layer) - 1) numpy vectorized function objects
        specifying the transfer function used by the neurons of each non-input layer.
        Defaults to the hyperbolic tangent if left unspecified.
    
    transfer_derivatives -- Sequence of (len(layer) - 1) numpy vectorized function objects
        giving the transfer function derivatives of the neurons of each non-input layer.
        Defaults to the hyperbolic tangent derivative if left unspecified.
    
    learning_rate -- Learning rate for backpropagation. Defaults to 0.1.
    '''
    
    def __init__(self, layers, weights=None, bias=None, transfer_functions=None, transfer_derivatives=None, learning_rate=0.1):
        self.layers = layers
        self.input_dim = layers[0]
        self.output_dim = layers[-1]
        self.num_layers = len(layers)
        
        # A level is the part of the network made of two adjacent layers together with the synaptic connections between them.
        self.levels = [(layers[i], layers[i+1]) for i in range(len(layers)-1)]
        self.num_levels = self.num_layers - 1
        
        self.weights = weights if weights else [np.zeros((level[1], level[0])) for level in self.levels]
        self.bias = bias if bias else [np.zeros(level[1]) for level in self.levels]
        
        self.learning_rate = learning_rate
        
        if transfer_functions is not None:
            self.transfer_functions = transfer_functions
        else:
            self.transfer_functions = [np.tanh] * self.num_levels
            
        if transfer_derivatives is not None:
            self.transfer_derivatives = transfer_derivatives
        else:
            self.transfer_derivatives = [lambda x: 1.0 - np.tanh(x)**2] * self.num_levels
        
        
    def validate(self):
        '''Check consistency of network parameters.'''
        
        
    def seed_weights(self, lower=-1.0, upper=1.0, seed=None):
        '''Initialize weights to random uniformly distributed reals in the half-open interval [lower, upper).
        Optionally seed the random number generator.
        '''
        
        if seed is not None:
            np.random.seed(seed)
            
        self.weights = [(np.random.rand(level[1], level[0]) - 0.5) * (upper - lower) for level in self.levels]
        
        
    def seed_bias(self, lower=-1.0, upper=1.0, seed=None):
        if seed is not None:
            np.random.seed(seed)
            
        self.bias = [(np.random.rand(level[1]) - 0.5) * (upper - lower) for level in self.levels]
    
    
    def feed(self, input_vector):
        output = input_vector
        for i, W in enumerate(self.weights):
            output = self.transfer_functions[i](np.dot(W, output) + self.bias[i])
        return output
    
    
    def backpropagate(self, training_input, training_output):
        '''Adjust weights using backpropagation on one training vector.'''
        
        # Feed training input through the network and get the resulting output.
        net_values = [None] * self.num_levels  # Used to store the net values of the output neurons of each level.
        output_values = [None] * self.num_levels  # Store the actual output values (i.e. the net value passed through the transfer function) of the output neurons of each level.
        
        output = training_input
        for i, W in enumerate(self.weights):
            net = np.dot(W, output) + self.bias[i]  # Vector of net values (i.e. weighted sum of synaptic inputs + bias) of the output neurons.
            output = self.transfer_functions[i](net)  # Output vector of this level.
            # Store net and output for backprop later:
            net_values[i] = net
            output_values[i] = output
            
        # Backpropagate to get the deltas, weight and bias changes.
        deltas = [None] * self.num_levels
        weight_changes = [None] * self.num_levels
        bias_changes = [None] * self.num_levels
        # Output layer:
        deltas[-1] = (training_output - output) * self.transfer_derivatives[-1](net_values[-1])
        weight_changes[-1] = self.learning_rate * np.outer(deltas[-1], output_values[-2])
        bias_changes[-1] = self.learning_rate * deltas[-1]
        # Hidden layers:
        for j in range(self.num_levels-2, 0, -1):
            deltas[j] = np.dot(deltas[j+1], self.weights[j+1]) * self.transfer_derivatives[j](net_values[j])
            weight_changes[j] = self.learning_rate * np.outer(deltas[j], output_values[j-1])
            bias_changes[j] = self.learning_rate * deltas[j]
        # First level:
        deltas[0] = np.dot(deltas[1], self.weights[1]) * self.transfer_derivatives[0](net_values[0])
        weight_changes[0] = self.learning_rate * np.outer(deltas[0], training_input)
        bias_changes[0] = self.learning_rate * deltas[0]
        
        # Finally adjust bias and weights.
        self.weights = [self.weights[i] + weight_changes[i] for i in range(self.num_levels)]
        self.bias = [self.bias[i] + bias_changes[i] for i in range(self.num_levels)]
        
    
    def __repr__(self):
        return "MLP({0}, {1})".format(self.layers, self.weights)

In [266]:
# XOR problem

mlp = MLP([2,2,1], learning_rate=0.5)
mlp.seed_weights()
mlp.seed_bias()

X = [[0,0],[0,1],[1,0],[1,1]]
Y = [0,1,1,0]

n = len(X)

for i in range(100000):
    ix = np.random.randint(0,n)
    mlp.backpropagate(X[ix], Y[ix])
    if i % 100 == 0:
        error = sum(np.linalg.norm(Y[k] - mlp.feed(X[k]))**2 for k in range(n))
        print('Error: {0}'.format(error))

Error: 1.3248422655473633
Error: 1.2548549263567006
Error: 0.7899329947588596
Error: 0.6044538534090328
Error: 0.049905028353135136
Error: 0.015536590608611124
Error: 0.0419431356692855
Error: 0.020579757427081173
Error: 0.016197880020680986
Error: 0.008228889893424745
Error: 0.013613276628201316
Error: 0.00965271353008835
Error: 0.005478222409751278
Error: 0.011779095182343739
Error: 0.008504553042425728
Error: 0.004728532615813926
Error: 0.003128448967049315
Error: 0.008215190637484725
Error: 0.00469184506284092
Error: 0.0015498315282948508
Error: 0.002075191323672216
Error: 0.004414695532926566
Error: 0.005963027784733496
Error: 0.0024577779227828002
Error: 0.0009100711507629298
Error: 0.0024087909114779774
Error: 0.00326687361297179
Error: 0.0023566938261070543
Error: 0.0012229292772830513
Error: 0.0061360939147182235
Error: 0.0019281396788281581
Error: 0.000652214561922494
Error: 0.000542729634785704
Error: 0.0007290042004292475
Error: 0.0036407530708024776
Error: 0.00259114237104

In [248]:
for v in X:
    print(v, mlp.feed(v))

[0, 0, 0] [-0.0407064]
[0, 0, 1] [ 0.99600742]
[0, 1, 0] [ 0.9961289]
[0, 1, 1] [ 0.00126819]
[1, 0, 0] [ 0.99851061]
[1, 0, 1] [ 0.00364614]
[1, 1, 0] [ 0.00673528]
[1, 1, 1] [ 0.00227173]


In [262]:
# Trying out the 8-1-8 problem.

# mlp = MLP([2,1,2], transfer_functions=[lambda x: x, np.sin], transfer_derivatives=[lambda x: 1, np.cos], learning_rate=0.2)
mlp = MLP([2,1,2], learning_rate=0.2)
mlp.seed_weights()
mlp.seed_bias()

def bitlist(n):
    def _bitlist(n, lists):
        if n == 0:
            return lists
        else:
            return _bitlist(n-1, [l + [0] for l in lists] + [l + [1] for l in lists])
    return _bitlist(n, [[]])

X = bitlist(2)
n = len(X)

for i in range(1000000):
    ix = np.random.randint(0,n)
    mlp.backpropagate(X[ix], X[ix])
    if i % 100 == 0:
        error = sum(np.linalg.norm(X[k] - mlp.feed(X[k])) for k in range(n))
        print('Error: {0}'.format(error))

Error: 5.293328395582459
Error: 1.8189176804704172
Error: 2.083375016733529
Error: 1.9125466801192825
Error: 1.7635408102921912
Error: 1.7668991490028707
Error: 1.7435693877963696
Error: 1.7542483750013456
Error: 1.8117431645948776
Error: 1.6872596273831122
Error: 1.7992550166336996
Error: 1.6727864098867145
Error: 1.7643326347024468
Error: 1.7551308691180032
Error: 1.864622040392152
Error: 1.5767206961471902
Error: 1.636518657604497
Error: 1.9334816390591067
Error: 1.7182103428375826
Error: 1.7015245767792309
Error: 1.923504896009838
Error: 1.7533096123240512
Error: 1.7974706236423852
Error: 1.943987496357591
Error: 1.8769198265038691
Error: 1.9218835197377406
Error: 1.8338294985672916
Error: 1.8033467067094784
Error: 2.1044990283966953
Error: 1.8320333219552016
Error: 1.675642427237854
Error: 1.9208064457642013
Error: 1.901311655171107
Error: 1.6261552206032182
Error: 1.7340095956833432
Error: 1.8475538550457393
Error: 1.7638268626022564
Error: 1.5683012680815351
Error: 1.93360562267

KeyboardInterrupt: 

In [242]:
for v in X:
    print(v, mlp.feed(v))

[0, 0, 0, 0, 0, 0, 0, 0] [  9.14699830e-01   2.34878488e-03   8.64234656e-01   9.41189224e-01
  -5.26067034e-04   8.59565398e-01   5.02788091e-01   1.87662627e-01]
[1, 0, 0, 0, 0, 0, 0, 0] [  9.14702363e-01   1.48930051e-03   8.64242702e-01   9.41191058e-01
   2.92435467e-04   8.59556660e-01   5.02755555e-01   1.87639140e-01]
[0, 1, 0, 0, 0, 0, 0, 0] [ 0.69150741  1.          0.20695214  0.71473672  0.0079095   0.92784125
  0.83405909  0.64828997]
[1, 1, 0, 0, 0, 0, 0, 0] [ 0.72392423  1.          0.30176009  0.75104145  0.00949056  0.92318919
  0.81490034  0.61603395]
[0, 0, 1, 0, 0, 0, 0, 0] [  9.14709697e-01   2.68882233e-04   8.64256572e-01   9.41197868e-01
  -1.35718527e-03   8.59564357e-01   5.02768750e-01   1.87625734e-01]
[1, 0, 1, 0, 0, 0, 0, 0] [  9.14708978e-01   2.45895943e-04   8.64256272e-01   9.41197031e-01
  -7.43425299e-04   8.59559892e-01   5.02755378e-01   1.87620908e-01]
[0, 1, 1, 0, 0, 0, 0, 0] [ 0.89513845  0.99955251  0.81466026  0.92424817  0.00265091  0.8730076