In [None]:
""" 
    By github.com/vzki (Vadim Tschernezki).
    Low-level implementation of a Multi Layer Perceptron (MLP) in Python.
    Utilizes batch matrix operations (forwarding and backwarding per sample*s*, not per sample).
    
    Use case: classifying XOR. If only one neuron is used, then XOR problem can't be solved.
"""

In [1]:
import numpy as np
import time

In [2]:
nb_samples_tr = 500
nb_samples_te = 100

In [3]:
# XOR data set

xs_tr = np.random.randint(low = 0, high = 2, size = [nb_samples_tr, 2])
ys_tr = np.logical_xor(xs_tr[:,0], xs_tr[:,1])
ys_tr = ys_tr.reshape((nb_samples_tr, -1))

xs_te = np.random.randint(low = 0, high = 2, size = [nb_samples_te, 2])
ys_te = np.logical_xor(xs_te[:,0], xs_te[:,1])
ys_te = ys_te.reshape((nb_samples_te, -1))

print xs_te[:10], ys_te[:10]

[[0 0]
 [0 1]
 [1 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [1 1]] [[False]
 [ True]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [ True]
 [ True]
 [False]]


In [4]:
def sigmoid(a, derived = False):
    
    if derived == True:
        
        return sigmoid(a) * (1 - sigmoid(a))
    
    else:
        
        return 1 / (1 + np.exp(-a))

In [5]:
def calc_accuracy(pred, targ):
    
    nb_samples = float(pred.shape[0])
    
    labels_targ = np.argmax(targ, axis = 1)
    labels_pred = np.argmax(pred, axis = 1)
    
    return np.sum(labels_targ == labels_pred) / nb_samples
    

In [6]:
class Dense:
    
    def __init__(self, nb_inputs, nb_neurons):
        """ Note that a dense layer is different from an input layer.
            An input layer does not have any activation function or similar.
            In terms of classical architecture definition, this dense layer is the
            "second" layer of a neural network. """
        
        # example dimensions calculation: [3, 2] (w) x [2, 1] (x) = [3, 1] (z)
        self._weights = np.random.randn(nb_inputs, nb_neurons) / np.sqrt(nb_inputs)
        
        # weights for biases (biases are equal to 1, but weights get updated)
        self._biases  = np.zeros((1, nb_neurons))
        
        self._delta = None
        
    def forward(self, a_prev):
        
        self._a_prev = a_prev
        
        self._z = (a_prev).dot(self._weights) + self._biases
        
        self._a = sigmoid(self._z)
        
        return self._a
    
    def backward(self, delta_next, weights_next):
        
        return (delta_next).dot(weights_next.T) * sigmoid(self._z, derived = True)

In [7]:
class MultiLayerPerceptron:
    
    def __init__(self):
        
        self._layers = []
        
    def add(self, dense):
        
        self._layers.append(dense)
        
    def build(self):
        
        self._deltas = [None] * len(self._layers)
        
    def forward(self, x):
        
        output = np.array(x)
        
        for l in self._layers:
            
            output = l.forward(output)
            
            # print output
            
        return output
    
    def backward(self, targ, pred):
        
        self._layers[-1]._delta = - (targ - pred) * (sigmoid(self._layers[-1]._z, derived = True))
        
        l_next = self._layers[-1]

        for i in reversed(range(len(self._layers[:-1]))):
            
            self._layers[i]._delta = self._layers[i].backward(l_next._delta, l_next._weights)
               
            l_next = self._layers[i]
        
    def update(self, learning_rate):
        
        for i in reversed(range(len(self._layers))):
            
            self._layers[i]._weights -= learning_rate * self._layers[i]._a_prev.T.dot(self._layers[i]._delta)
            
            self._layers[i]._biases -= learning_rate * np.sum(self._layers[i]._delta, axis = 0, keepdims = True)

In [8]:
mlp = MultiLayerPerceptron()

# one hidden layer with 4 neurons results in accuracy of 1
mlp.add(Dense(2, 3))
mlp.add(Dense(3, 1))

# no hidden layer results in accuracy of about 0.5
# mlp.add(Dense(2, 1))
# mlp.add(Dense(1, 1))


mlp.build()

In [9]:
pred = mlp.forward(xs_tr)

print "Predicting labels without training."
print "Accuracy: ", np.sum((pred > 0.5) == ys_tr) / float(ys_tr.shape[0])

Predicting labels without training.
Accuracy:  0.48


In [10]:
# batch gradient descent

sz_batch   = 10
nb_epochs  = 20

time_start = time.time()

for j in range(nb_epochs):

    for i in range(0, nb_samples_tr, sz_batch):

        pred = mlp.forward(xs_tr[i : i + sz_batch])

        mlp.backward(ys_tr[i : i + sz_batch], pred)

        mlp.update(learning_rate = 0.1)
        
    pred_te = mlp.forward(xs_te)
    
    # print "Epoch {}. Testing accurcacy: {}".format(j + 1, calc_accuracy(pred_te, ys_te))

    # case: xor
    
    print "Accuracy: ", np.sum((pred_te > 0.5)[:] == ys_te[:]) / float(ys_te.shape[0])
    
time_end = time.time()

print "Training took {} seconds.".format(time_end - time_start)

Accuracy:  0.74
Accuracy:  0.74
Accuracy:  0.74
Accuracy:  0.74
Accuracy:  0.74
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  1.0
Accuracy:  1.0
Training took 0.195467948914 seconds.


In [11]:
print "Predicting labels after training."
pred = mlp.forward(xs_te)
print "Accuracy: ", np.sum((pred > 0.5) == ys_te) / float(nb_samples_te)

Predicting labels after training.
Accuracy:  1.0


In [12]:
print "Features: {: >60}".format(xs_te[:20, 0])
print "{: >70}".format(xs_te[:20, 1])
print "Predicted labels: {: >52}".format(((pred > 0.5)[:20, 0]).astype(np.uint0).T)
print "Target labels: {: >55}".format(((ys_te > 0.5)[:20, 0]).astype(np.uint0).T)
print "Correct predictions:{: >50}".format(((pred > 0.5)[:20] == ys_te[:20]).astype(np.uint0)[:,0].T)
print "Accuracy: {: >22}".format(np.sum((pred > 0.5)[:20] == ys_te[:20]) / 20.0)

Features:                    [0 0 1 0 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 0]
                             [0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0 1 0]
Predicted labels:            [0 1 1 0 1 0 0 1 1 0 1 1 1 1 1 0 0 0 0 0]
Target labels:               [0 1 1 0 1 0 0 1 1 0 1 1 1 1 1 0 0 0 0 0]
Correct predictions:         [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Accuracy:                    1.0
