In [1]:
import numpy as np
from collections import namedtuple

In [2]:
# Tested against the example in
# https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
# (Some equations were changed to match the expected outputs)

In [3]:
Act = namedtuple("Act", ["name", "func", "deriv"])

sig_func = lambda x: 1 / (1 + np.exp(-x))
sig_deriv = lambda x: sig_func(x) * (1 - sig_func(x))

sigmoid = Act(
    name="Sigmoid",
    func=sig_func,
    deriv=sig_deriv
)

class Layer:

    def __init__(self, num, prev, bias=1, act=sigmoid, eta=0.4):
        self.weights = np.random.standard_normal((num, prev + bias))
        self.act = act
        self.eta = eta
        self.bias = bias

        self.deltas = None
        self.forward_inputs = None
        self.nets = None

    def first_forward(self, forward_inputs):
        self.forward_inputs = forward_inputs
        self.nets = forward_inputs @ self.weights.T
        return self.act.func(self.nets)

    def backward(self, back_deltas, back_weights):
        sums = back_deltas @ back_weights
        if self.bias == 1:
            self.nets = np.append(self.nets, 1)
        self.deltas = self.act.deriv(self.nets) * sums
        return self.deltas, self.weights

    def second_forward(self):
        self.deltas = self.deltas.flatten()
        self.forward_inputs = self.forward_inputs.flatten()
        for i, row in enumerate(self.weights):
            row -= self.eta * self.deltas[i] * self.forward_inputs


class OutputLayer(Layer):

    def backward(self, total_error):
        self.deltas = total_error * self.act.deriv(self.nets)
        return self.deltas, self.weights


In [4]:
h_layer = Layer(2, 2, 1, eta=0.5)

In [5]:
h_layer.weights = np.array([
    [0.15, 0.20, 0.35],
    [0.25, 0.30, 0.35]
]) # 2 x 3

In [6]:
np.array([[0.05, 0.1, 1]]) @ h_layer.weights.T

array([[0.3775, 0.3925]])

In [7]:
inputs = np.array([[0.05, 0.1, 1]]) # 1 x 3

In [8]:
h_ys = h_layer.first_forward(inputs)

In [9]:
h_ys

array([[0.59326999, 0.59688438]])

In [10]:
o_layer = OutputLayer(2, 2, 1, eta=0.5)

In [11]:
o_layer.weights = np.array([
    [0.4, 0.45, 0.6],
    [0.5, 0.55, 0.6]
])

In [12]:
o_inputs = np.append(h_ys, 1).reshape((1, 3))

In [13]:
o_inputs

array([[0.59326999, 0.59688438, 1.        ]])

In [14]:
o_ys = o_layer.first_forward(o_inputs)

In [15]:
o_ys

array([[0.75136507, 0.77292847]])

In [16]:
ts = np.array([[0.01, 0.99]])

In [17]:
e = -1 * (ts - o_ys)

In [18]:
e

array([[ 0.74136507, -0.21707153]])

In [19]:
o_sigmas, o_weights = o_layer.backward(e)

In [20]:
o_sigmas

array([[ 0.13849856, -0.03809824]])

In [21]:
sums = o_sigmas @ o_weights

In [22]:
sums

array([[0.03635031, 0.04137032, 0.0602402 ]])

In [23]:
h_sigmas, _ = h_layer.backward(o_sigmas, o_weights)

In [24]:
h_layer.forward_inputs

array([[0.05, 0.1 , 1.  ]])

In [25]:
h_layer.second_forward()

In [26]:
h_layer.weights

array([[0.14978072, 0.19956143, 0.34561432],
       [0.24975114, 0.29950229, 0.34502287]])

In [27]:
o_layer.second_forward()

In [28]:
o_layer.weights

array([[0.35891648, 0.40866619, 0.53075072],
       [0.51130127, 0.56137012, 0.61904912]])