In [1]:
import numpy as np

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [69]:
ds_tr = mnist.train

xs_tr = ds_tr.images

ys_tr = ds_tr.labels

In [134]:
ds_te = mnist.test

xs_te = ds_te.images[:]

ys_te = ds_te.labels[:]

In [70]:
def sigmoid(a, derived = False):
    
    if derived == True:
        
        return sigmoid(a) * (1 - sigmoid(a))
    
    else:
        
        return 1 / (1 + np.exp(-a))

In [145]:
def calc_accuracy(pred, targ):
    
    nb_samples = float(pred.shape[0])
    
    labels_targ = np.argmax(targ, axis = 1)
    labels_pred = np.argmax(pred, axis = 1)
    
    return np.sum(labels_targ == labels_pred) / nb_samples
    

In [71]:
class Dense:
    
    def __init__(self, nb_inputs, nb_neurons):
        """ Note that a dense layer is different from an input layer.
            An input layer does not have any activation function or similar.
            In terms of classical architecture definition, this dense layer is the
            second layer of a neural network. """
        
        # example dimensions calculation: [3, 2] (w) x [2, 1] (x) = [3, 1] (z)
        # self._weights = np.random.uniform(size = (nb_inputs, nb_neurons))
        
        self._weights = np.random.randn(nb_inputs, nb_neurons) / np.sqrt(nb_inputs)
        
        # weights for biases (biases are equal to 1, but weights get updated)
        self._biases  = np.zeros((1, nb_neurons))
        
        self._delta = None
        
    def forward(self, a_prev):
        
        self._a_prev = a_prev
        
        # print a_prev
        
        self._z = (a_prev).dot(self._weights) + self._biases
        
        self._a = sigmoid(self._z)
        
        return self._a
    
    def backward(self, delta_next, weights_next):
        
        # print (delta_next).dot(weights_next.T).shape
        
        return (delta_next).dot(weights_next.T) * sigmoid(self._z, derived = True)

In [72]:
class MultiLayerPerceptron:
    
    def __init__(self):
        
        self._layers = []
        
    def add(self, dense):
        
        self._layers.append(dense)
        
    def build(self):
        
        self._deltas = [None] * len(self._layers)
        
    def forward(self, x):
        
        output = np.array(x)
        
        for l in self._layers:
            
            output = l.forward(output)
            
            # print output
            
        return output
    
    def backward(self, targ, pred):
        
        self._layers[-1]._delta = - (targ - pred) * (sigmoid(self._layers[-1]._z, derived = True))
        
        l_next = self._layers[-1]

        for i in reversed(range(len(self._layers[:-1]))):
            
            self._layers[i]._delta = self._layers[i].backward(l_next._delta, l_next._weights)
               
            l_next = self._layers[i]
        
    def update(self, learning_rate):
        
        for i in reversed(range(len(self._layers))):
            
            self._layers[i]._weights -= learning_rate * self._layers[i]._a_prev.T.dot(self._layers[i]._delta)
            
            self._layers[i]._biases -= learning_rate * np.sum(self._layers[i]._delta, axis = 0, keepdims = True)

In [141]:
mlp = MultiLayerPerceptron()

mlp.add(Dense(784, 24))
mlp.add(Dense(24, 12))
mlp.add(Dense(12, 10))

mlp.build()

In [142]:
pred = mlp.forward(xs_te)

# display first 20 predictions and labels without training
print np.argmax(pred, axis = 1)[:20]
print np.argmax(ys_te, axis = 1)[:20]

[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [171]:
sz_batch   = 100
nb_epochs  = 5
nb_samples = 10000

for i in range(nb_epochs):

    for i in range(0, nb_samples, sz_batch):

        pred = mlp.forward(xs_tr[i : i + sz_batch])

        mlp.backward(ys_tr[i : i + sz_batch], pred)

        mlp.update(0.1)
    



In [172]:
pred = mlp.forward(xs_te)

# display first 20 predictions and labels with training
print np.argmax(pred, axis = 1)[:20]
print np.argmax(ys_te, axis = 1)[:20]

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 2 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [173]:
print calc_accuracy(pred, ys_te)

0.929
