In [2]:
import numpy as np

class MLP:
    '''Multilayer perceptron class.
    
    Parameters:
    layers -- Sequence of ints giving the number of neurons in each layer.
    
    weights -- Sequence of (len(layer) - 1) numpy matrices W. W[l][j][i] gives the weight
        of the synaptic connection from neuron i of layer l to neuron j of layer (l+1).
    
    transfer_functions -- Sequence of (len(layer) - 1) numpy vectorized function objects
        specifying the transfer function used by the neurons of each non-input layer.
        Defaults to the hyperbolic tangent if left unspecified.
    
    transfer_derivatives -- Sequence of (len(layer) - 1) numpy vectorized function objects
        giving the transfer function derivatives of the neurons of each non-input layer.
        Defaults to the hyperbolic tangent derivative if left unspecified.
    
    learning_rate -- Learning rate for backpropagation. Defaults to 0.1.
    '''
    
    def __init__(self, layers, weights=None, bias=None, transfer_functions=None, transfer_derivatives=None, learning_rate=0.1):
        self.layers = layers
        self.input_dim = layers[0]
        self.output_dim = layers[-1]
        self.num_layers = len(layers)
        
        # A level is the part of the network made of two adjacent layers together with the synaptic connections between them.
        self.levels = [(layers[i], layers[i+1]) for i in range(len(layers)-1)]
        self.num_levels = self.num_layers - 1
        
        self.weights = weights if weights else [np.zeros((level[1], level[0])) for level in self.levels]
        self.bias = bias if bias else [np.zeros(level[1]) for level in self.levels]
        
        self.learning_rate = learning_rate
        
        if transfer_functions is not None:
            self.transfer_functions = transfer_functions
        else:
            self.transfer_functions = [np.tanh] * self.num_levels
            
        if transfer_derivatives is not None:
            self.transfer_derivatives = transfer_derivatives
        else:
            self.transfer_derivatives = [lambda x: 1.0 - np.tanh(x)**2] * self.num_levels
        
        
    def validate(self):
        '''Check consistency of network parameters.'''
        
        
    def seed_weights(self, lower=-1.0, upper=1.0, seed=None):
        '''Initialize weights to random uniformly distributed reals in the half-open interval [lower, upper).
        Optionally seed the random number generator.
        '''
        
        if seed is not None:
            np.random.seed(seed)
            
        self.weights = [(np.random.rand(level[1], level[0]) - 0.5) * (upper - lower) for level in self.levels]
        
        
    def seed_bias(self, lower=-1.0, upper=1.0, seed=None):
        if seed is not None:
            np.random.seed(seed)
            
        self.bias = [(np.random.rand(level[1]) - 0.5) * (upper - lower) for level in self.levels]
    
    
    def feed(self, input_vector):
        output = input_vector
        for i, W in enumerate(self.weights):
            output = self.transfer_functions[i](np.dot(W, output) + self.bias[i])
        return output
    
    
    def backpropagate(self, training_input, training_output):
        '''Adjust weights using backpropagation on one training vector.'''
        
        # Feed training input through the network and get the resulting output.
        net_values = [None] * self.num_levels  # Used to store the net values of the output neurons of each level.
        output_values = [None] * self.num_levels  # Store the actual output values (i.e. the net value passed through the transfer function) of the output neurons of each level.
        
        output = training_input
        for i, W in enumerate(self.weights):
            net = np.dot(W, output) + self.bias[i]  # Vector of net values (i.e. weighted sum of synaptic inputs + bias) of the output neurons.
            output = self.transfer_functions[i](net)  # Output vector of this level.
            # Store net and output for backprop later:
            net_values[i] = net
            output_values[i] = output
            
        # Backpropagate to get the deltas, weight and bias changes.
        deltas = [None] * self.num_levels
        weight_changes = [None] * self.num_levels
        bias_changes = [None] * self.num_levels
        # Output layer:
        deltas[-1] = (training_output - output) * self.transfer_derivatives[-1](net_values[-1])
        weight_changes[-1] = self.learning_rate * np.outer(deltas[-1], output_values[-2])
        bias_changes[-1] = self.learning_rate * deltas[-1]
        # Hidden layers:
        for j in range(self.num_levels-2, 0, -1):
            deltas[j] = np.dot(deltas[j+1], self.weights[j+1]) * self.transfer_derivatives[j](net_values[j])
            weight_changes[j] = self.learning_rate * np.outer(deltas[j], output_values[j-1])
            bias_changes[j] = self.learning_rate * deltas[j]
        # First level:
        deltas[0] = np.dot(deltas[1], self.weights[1]) * self.transfer_derivatives[0](net_values[0])
        weight_changes[0] = self.learning_rate * np.outer(deltas[0], training_input)
        bias_changes[0] = self.learning_rate * deltas[0]
        
        # Finally adjust bias and weights.
        self.weights = [self.weights[i] + weight_changes[i] for i in range(self.num_levels)]
        self.bias = [self.bias[i] + bias_changes[i] for i in range(self.num_levels)]
        
    
    def __repr__(self):
        return "MLP({0}, {1})".format(self.layers, self.weights)

In [8]:
# XOR problem

import matplotlib.pyplot as plt

mlp = MLP([2,2,1], learning_rate=0.5)
mlp.seed_weights()
mlp.seed_bias()

X = [[0,0],[0,1],[1,0],[1,1]]
Y = [0,1,1,0]

n = len(X)
steps = 100000

errors = np.zeros(steps)
for i in range(steps):
    ix = np.random.randint(0,n)
    mlp.backpropagate(X[ix], Y[ix])
    error = sum(np.linalg.norm(Y[k] - mlp.feed(X[k]))**2 for k in range(n))
    errors[i] = error

plt.plot(list(range(1, steps + 1)), errors)
plt.show()

[<matplotlib.lines.Line2D at 0x7fd0c45d55f8>]

In [248]:
for v in X:
    print(v, mlp.feed(v))

[0, 0, 0] [-0.0407064]
[0, 0, 1] [ 0.99600742]
[0, 1, 0] [ 0.9961289]
[0, 1, 1] [ 0.00126819]
[1, 0, 0] [ 0.99851061]
[1, 0, 1] [ 0.00364614]
[1, 1, 0] [ 0.00673528]
[1, 1, 1] [ 0.00227173]


In [14]:
# Trying out the 8-1-8 problem.

mlp = MLP([4,1,4], transfer_functions=[lambda x: x, np.sin], transfer_derivatives=[lambda x: 1, np.cos], learning_rate=0.2)
# mlp = MLP([2,1,2], learning_rate=0.2)
mlp.seed_weights()
mlp.seed_bias()

def bitlist(n):
    def _bitlist(n, lists):
        if n == 0:
            return lists
        else:
            return _bitlist(n-1, [l + [0] for l in lists] + [l + [1] for l in lists])
    return _bitlist(n, [[]])

X = bitlist(4)
n = len(X)

steps = 100000

errors = np.zeros(steps)

for i in range(steps):
    ix = np.random.randint(0,n)
    mlp.backpropagate(X[ix], X[ix])
    error = 0.5 * sum(np.linalg.norm(X[k] - mlp.feed(X[k]))**2 for k in range(n))
    errors[i] = error
    
plt.plot(list(range(1, steps+1)), errors)
plt.show()

In [11]:
errors[100]

0.79253659455644687

In [12]:
for v in X:
    print(v, mlp.feed(v))

[0, 0] [-0.09434568  0.17252593]
[1, 0] [ 0.75181116  0.71195231]
[0, 1] [ 0.40934199  0.48936508]
[1, 1] [ 0.97931006  0.90451564]
