Implementation of [Learning representations by back-propagating erros](http://www.cs.toronto.edu/~hinton/absps/naturebp.pdf) 1986 paper from scratch.  

MNIST dataset is used for this example.
* No optimizations or data oriented aproach was used. 
* No SGD momentum was used
* Baises were used, there are no mentions of them in original paper

In [21]:
import random
import math
import numpy as np
import torch
from torchvision import datasets, transforms


PyTorch is used only to simplify data loading

In [43]:
trainset = datasets.MNIST('./datasets/', download=True, train=True, transform=transforms.ToTensor())
valset = datasets.MNIST('./datasets', download=True, train=False, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=True)

Create `Neuron` class

In [94]:
class Neuron:
    def __init__(self, previous_layer):
        self.previous_layer = previous_layer

        if self.previous_layer is None:
            return

        # for the training to be effective at beginning initialize all parameters randomly
        self.weights = [random.uniform(-1.,1.) for _ in range(len(self.previous_layer))]
        self.bias = random.uniform(-1.,1.)

    def calculate_output(self):
        # dot porduct 
        self.output = sum([self.weights[i]*self.previous_layer[i].output for i in range(len(self.weights))])
        self.output += self.bias

        # Sigmoid activation function
        self.output = 1. / (1. + math.exp(-self.output))

Create `NeuralNetwork` class

In [117]:
class NeuralNetowork:
    def __init__(self):
        self.layers = []
        self.learning_rate = 0.01

    def add_layer(self, neuron_amount):
        prevous_layer = self.layers[-1] if len(self.layers) > 0 else None
        new_layer = [Neuron(prevous_layer) for _ in range(neuron_amount)]
        self.layers.append(new_layer)

    def predict(self, input_values):
        input_layer = self.layers[0]
        for i in range(len(input_values)):
            input_layer[i].output = input_values[i]

        for layer in self.layers[1:]:
            for neuron in layer:
                neuron.calculate_output()

        # last layer contains result of our prediction
        return list(map(lambda x: x.output, self.layers[-1]))

    def back_propagation(self, d, y):
        for layer_idx in reversed(range(1, len(self.layers))):
            current_layer = self.layers[layer_idx]
            previous_layer = self.layers[layer_idx-1]

            accum_y = [0.] * len(previous_layer)
            
            # update current layer
            for j in range(len(current_layer)):
                weights = current_layer[j].weights
                dEdx = y[j] * (1-y[j]) * (y[j]-d[j])
                for i in range(len(weights)):
                    accum_y[i] += weights[i] * dEdx
                    delta_w = previous_layer[i].output * dEdx
                    weights[i] -= self.learning_rate * delta_w

                current_layer[j].bias -= self.learning_rate * dEdx

            # Update expected values for next layer backpropagation
            y = [None] * len(previous_layer)
            d = [None] * len(previous_layer)
            for j in range(len(previous_layer)):
                y[j] = previous_layer[j].output
                d[j] = previous_layer[j].output - accum_y[j]

In [None]:
def calculate_error(network):
    accum_error = 0.
    accurate_count, total_count = 0, 0
    output_size = 10
    for images, labels in valloader:
        input_values = images.flatten().tolist()
        correct_label = labels.flatten().tolist()[0]
        val_labels = [0]*output_size
        val_labels[correct_label] = 1

        preds = network.predict(input_values)
        for i in range(len(preds)):
            accum_error += math.pow(preds[i] - val_labels[i], 2)

        total_count += 1
        if correct_label == preds.index(max(preds)):
            accurate_count += 1
        
    accum_error /= total_count * output_size

    print(f"[MSE]: {accum_error}")
    print(f"[Accuracy]: {accurate_count/total_count}")

Initialize network

In [118]:
net = NeuralNetowork()
net.add_layer(784)
net.add_layer(16)
net.add_layer(16)
net.add_layer(10)

Test network on one sample input

In [114]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

input_values = images.flatten().tolist()

net.predict(input_values)

[0.37481259197173833,
 0.4187727752001508,
 0.6067622543298206,
 0.4176515988860953,
 0.422300716194616,
 0.7126225169797678,
 0.28532597080705097,
 0.8880377357832795,
 0.2568677440463119,
 0.13013270352901635]

Test error of freshly initialized network

In [115]:
calculate_error(net)

[MSE]: 0.24968736484429296
[Accuracy]: 0.1028


Training routine

In [119]:
epochs = 3
output_size = 10

for e in range(epochs):
    print(f"[Epoch]: {e}")

    for images, labels in trainloader:
        input_values = images.flatten().tolist()
        correct_label = labels.flatten().tolist()[0]
        train_labels = [0.]*output_size
        train_labels[correct_label] = 1.

        preds = net.predict(input_values)

        net.back_propagation(train_labels, preds)

    calculate_error(net)


[Epoch]: 0
[MSE]: 0.05236983214175698
[Accuracy]: 0.6567
[Epoch]: 1
[MSE]: 0.036356696459431244
[Accuracy]: 0.7855
[Epoch]: 2
[MSE]: 0.025544010296224225
[Accuracy]: 0.8506
