In [1]:
import random
import math

#
# Shorthand:
#   "pd_" as a variable prefix means "partial derivative"
#   "d_" as a variable prefix means "derivative"
#   "_wrt_" is shorthand for "with respect to"
#   "w_ho" and "w_ih" are the index of weights from hidden to output layer neurons and input to hidden layer neurons respectively
#
# Comment references:
#
# [1] Wikipedia article on Backpropagation
#   http://en.wikipedia.org/wiki/Backpropagation#Finding_the_derivative_of_the_error
# [2] Neural Networks for Machine Learning course on Coursera by Geoffrey Hinton
#   https://class.coursera.org/neuralnets-2012-001/lecture/39
# [3] The Back Propagation Algorithm
#   https://www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf

class NeuralNetwork:
    LEARNING_RATE = 0.5

    def __init__(self, num_inputs, num_hidden, num_outputs, hidden_layer_weights = None, hidden_layer_bias = None, output_layer_weights = None, output_layer_bias = None):
        self.num_inputs = num_inputs

        self.hidden_layer = NeuronLayer(num_hidden, hidden_layer_bias)
        self.output_layer = NeuronLayer(num_outputs, output_layer_bias)

        self.init_weights_from_inputs_to_hidden_layer_neurons(hidden_layer_weights)
        self.init_weights_from_hidden_layer_neurons_to_output_layer_neurons(output_layer_weights)

    def init_weights_from_inputs_to_hidden_layer_neurons(self, hidden_layer_weights):
        weight_num = 0
        for h in range(len(self.hidden_layer.neurons)):
            for i in range(self.num_inputs):
                if not hidden_layer_weights:
                    self.hidden_layer.neurons[h].weights.append(random.random())
                else:
                    self.hidden_layer.neurons[h].weights.append(hidden_layer_weights[weight_num])
                weight_num += 1

    def init_weights_from_hidden_layer_neurons_to_output_layer_neurons(self, output_layer_weights):
        weight_num = 0
        for o in range(len(self.output_layer.neurons)):
            for h in range(len(self.hidden_layer.neurons)):
                if not output_layer_weights:
                    self.output_layer.neurons[o].weights.append(random.random())
                else:
                    self.output_layer.neurons[o].weights.append(output_layer_weights[weight_num])
                weight_num += 1

    def inspect(self):
        print('------')
        print('* Inputs: {}'.format(self.num_inputs))
        print('------')
        print('Hidden Layer')
        self.hidden_layer.inspect()
        print('------')
        print('* Output Layer')
        self.output_layer.inspect()
        print('------')

    def feed_forward(self, inputs):
        hidden_layer_outputs = self.hidden_layer.feed_forward(inputs)
        return self.output_layer.feed_forward(hidden_layer_outputs)

    # Uses online learning, ie updating the weights after each training case
    def train(self, training_inputs, training_outputs):
        self.feed_forward(training_inputs)

        # 1. Output neuron deltas
        pd_errors_wrt_output_neuron_total_net_input = [0] * len(self.output_layer.neurons)
        for o in range(len(self.output_layer.neurons)):

            # ∂E/∂zⱼ
            pd_errors_wrt_output_neuron_total_net_input[o] = self.output_layer.neurons[o].calculate_pd_error_wrt_total_net_input(training_outputs[o])

        # 2. Hidden neuron deltas
        pd_errors_wrt_hidden_neuron_total_net_input = [0] * len(self.hidden_layer.neurons)
        for h in range(len(self.hidden_layer.neurons)):

            # We need to calculate the derivative of the error with respect to the output of each hidden layer neuron
            # dE/dyⱼ = Σ ∂E/∂zⱼ * ∂z/∂yⱼ = Σ ∂E/∂zⱼ * wᵢⱼ
            d_error_wrt_hidden_neuron_output = 0
            for o in range(len(self.output_layer.neurons)):
                d_error_wrt_hidden_neuron_output += pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].weights[h]

            # ∂E/∂zⱼ = dE/dyⱼ * ∂zⱼ/∂
            pd_errors_wrt_hidden_neuron_total_net_input[h] = d_error_wrt_hidden_neuron_output * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_input()

        # 3. Update output neuron weights
        for o in range(len(self.output_layer.neurons)):
            for w_ho in range(len(self.output_layer.neurons[o].weights)):

                # ∂Eⱼ/∂wᵢⱼ = ∂E/∂zⱼ * ∂zⱼ/∂wᵢⱼ
                pd_error_wrt_weight = pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].calculate_pd_total_net_input_wrt_weight(w_ho)

                # Δw = α * ∂Eⱼ/∂wᵢ
                self.output_layer.neurons[o].weights[w_ho] -= self.LEARNING_RATE * pd_error_wrt_weight

        # 4. Update hidden neuron weights
        for h in range(len(self.hidden_layer.neurons)):
            for w_ih in range(len(self.hidden_layer.neurons[h].weights)):

                # ∂Eⱼ/∂wᵢ = ∂E/∂zⱼ * ∂zⱼ/∂wᵢ
                pd_error_wrt_weight = pd_errors_wrt_hidden_neuron_total_net_input[h] * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_weight(w_ih)

                # Δw = α * ∂Eⱼ/∂wᵢ
                self.hidden_layer.neurons[h].weights[w_ih] -= self.LEARNING_RATE * pd_error_wrt_weight

    def calculate_total_error(self, training_sets):
        total_error = 0
        for t in range(len(training_sets)):
            training_inputs, training_outputs = training_sets[t]
            self.feed_forward(training_inputs)
            for o in range(len(training_outputs)):
                total_error += self.output_layer.neurons[o].calculate_error(training_outputs[o])
        return total_error

class NeuronLayer:
    def __init__(self, num_neurons, bias):

        # Every neuron in a layer shares the same bias
        self.bias = bias if bias else random.random()

        self.neurons = []
        for i in range(num_neurons):
            self.neurons.append(Neuron(self.bias))

    def inspect(self):
        print('Neurons:', len(self.neurons))
        for n in range(len(self.neurons)):
            print(' Neuron', n)
            for w in range(len(self.neurons[n].weights)):
                print('  Weight:', self.neurons[n].weights[w])
            print('  Bias:', self.bias)

    def feed_forward(self, inputs):
        outputs = []
        for neuron in self.neurons:
            outputs.append(neuron.calculate_output(inputs))
        return outputs

    def get_outputs(self):
        outputs = []
        for neuron in self.neurons:
            outputs.append(neuron.output)
        return outputs

class Neuron:
    def __init__(self, bias):
        self.bias = bias
        self.weights = []

    def calculate_output(self, inputs):
        self.inputs = inputs
        self.output = self.squash(self.calculate_total_net_input())
        return self.output

    def calculate_total_net_input(self):
        total = 0
        for i in range(len(self.inputs)):
            total += self.inputs[i] * self.weights[i]
        return total + self.bias

    # Apply the logistic function to squash the output of the neuron
    # The result is sometimes referred to as 'net' [2] or 'net' [1]
    def squash(self, total_net_input):
        return (math.exp(total_net_input)-math.exp(-total_net_input))/(math.exp(total_net_input)+math.exp(-total_net_input))

    # Determine how much the neuron's total input has to change to move closer to the expected output
    #
    # Now that we have the partial derivative of the error with respect to the output (∂E/∂yⱼ) and
    # the derivative of the output with respect to the total net input (dyⱼ/dzⱼ) we can calculate
    # the partial derivative of the error with respect to the total net input.
    # This value is also known as the delta (δ) [1]
    # δ = ∂E/∂zⱼ = ∂E/∂yⱼ * dyⱼ/dzⱼ
    #
    def calculate_pd_error_wrt_total_net_input(self, target_output):
        return self.calculate_pd_error_wrt_output(target_output) * self.calculate_pd_total_net_input_wrt_input();

    # The error for each neuron is calculated by the Mean Square Error method:
    def calculate_error(self, target_output):
        return 0.5 * (target_output - self.output) ** 2

    # The partial derivate of the error with respect to actual output then is calculated by:
    # = 2 * 0.5 * (target output - actual output) ^ (2 - 1) * -1
    # = -(target output - actual output)
    #
    # The Wikipedia article on backpropagation [1] simplifies to the following, but most other learning material does not [2]
    # = actual output - target output
    #
    # Alternative, you can use (target - output), but then need to add it during backpropagation [3]
    #
    # Note that the actual output of the output neuron is often written as yⱼ and target output as tⱼ so:
    # = ∂E/∂yⱼ = -(tⱼ - yⱼ)
    def calculate_pd_error_wrt_output(self, target_output):
        return -(target_output - self.output)

    # The total net input into the neuron is squashed using logistic function to calculate the neuron's output:
    # yⱼ = φ = 1 / (1 + e^(-zⱼ))
    # Note that where ⱼ represents the output of the neurons in whatever layer we're looking at and ᵢ represents the layer below it
    #
    # The derivative (not partial derivative since there is only one variable) of the output then is:
    # dyⱼ/dzⱼ = yⱼ * (1 - yⱼ)
    def calculate_pd_total_net_input_wrt_input(self):
        return self.output * (1 - self.output)

    # The total net input is the weighted sum of all the inputs to the neuron and their respective weights:
    # = zⱼ = netⱼ = x₁w₁ + x₂w₂ ...
    #
    # The partial derivative of the total net input with respective to a given weight (with everything else held constant) then is:
    # = ∂zⱼ/∂wᵢ = some constant + 1 * xᵢw₁^(1-0) + some constant ... = xᵢ
    def calculate_pd_total_net_input_wrt_weight(self, index):
        return self.inputs[index]

###

# Blog post example:

nn = NeuralNetwork(2, 2, 2, hidden_layer_weights=[0.15, 0.2, 0.25, 0.3], hidden_layer_bias=0.35, output_layer_weights=[0.4, 0.45, 0.5, 0.55], output_layer_bias=0.6)
for i in range(10000):
    nn.train([0.05, 0.1], [0.01, 0.99])
    print(i, round(nn.calculate_total_error([[[0.05, 0.1], [0.01, 0.99]]]), 9))
x


0 0.273929146
1 0.266615888
2 0.259197598
3 0.251686591
4 0.244097038
5 0.236444896
6 0.228747801
7 0.221024897
8 0.213296623
9 0.205584441
10 0.197910528
11 0.190297429
12 0.182767684
13 0.175343443
14 0.168046082
15 0.160895841
16 0.153911489
17 0.147110032
18 0.140506477
19 0.134113662
20 0.12794214
21 0.122000139
22 0.11629358
23 0.110826149
24 0.105599418
25 0.100613006
26 0.095864769
27 0.091351005
28 0.08706667
29 0.083005598
30 0.079160709
31 0.075524211
32 0.072087784
33 0.068842749
34 0.065780213
35 0.062891199
36 0.060166756
37 0.057598044
38 0.055176417
39 0.05289347
40 0.050741092
41 0.048711493
42 0.04679723
43 0.044991217
44 0.043286733
45 0.041677422
46 0.04015729
47 0.038720695
48 0.037362339
49 0.036077252
50 0.034860781
51 0.033708573
52 0.032616561
53 0.031580946
54 0.030598185
55 0.029664972
56 0.028778225
57 0.027935071
58 0.027132833
59 0.026369015
60 0.025641291
61 0.024947496
62 0.024285608
63 0.023653744
64 0.023050147
65 0.02247318
66 0.021921313
67 0.0213931

2168 0.000254948
2169 0.000254793
2170 0.000254637
2171 0.000254482
2172 0.000254328
2173 0.000254173
2174 0.000254018
2175 0.000253864
2176 0.00025371
2177 0.000253556
2178 0.000253402
2179 0.000253248
2180 0.000253095
2181 0.000252941
2182 0.000252788
2183 0.000252635
2184 0.000252482
2185 0.000252329
2186 0.000252176
2187 0.000252024
2188 0.000251871
2189 0.000251719
2190 0.000251567
2191 0.000251415
2192 0.000251263
2193 0.000251112
2194 0.00025096
2195 0.000250809
2196 0.000250658
2197 0.000250507
2198 0.000250356
2199 0.000250205
2200 0.000250055
2201 0.000249904
2202 0.000249754
2203 0.000249604
2204 0.000249454
2205 0.000249304
2206 0.000249154
2207 0.000249005
2208 0.000248855
2209 0.000248706
2210 0.000248557
2211 0.000248408
2212 0.000248259
2213 0.000248111
2214 0.000247962
2215 0.000247814
2216 0.000247666
2217 0.000247518
2218 0.00024737
2219 0.000247222
2220 0.000247074
2221 0.000246927
2222 0.000246779
2223 0.000246632
2224 0.000246485
2225 0.000246338
2226 0.000246192


4319 9.6839e-05
4320 9.6806e-05
4321 9.6772e-05
4322 9.6739e-05
4323 9.6705e-05
4324 9.6671e-05
4325 9.6638e-05
4326 9.6604e-05
4327 9.6571e-05
4328 9.6537e-05
4329 9.6504e-05
4330 9.6471e-05
4331 9.6437e-05
4332 9.6404e-05
4333 9.637e-05
4334 9.6337e-05
4335 9.6304e-05
4336 9.627e-05
4337 9.6237e-05
4338 9.6204e-05
4339 9.617e-05
4340 9.6137e-05
4341 9.6104e-05
4342 9.6071e-05
4343 9.6037e-05
4344 9.6004e-05
4345 9.5971e-05
4346 9.5938e-05
4347 9.5905e-05
4348 9.5872e-05
4349 9.5839e-05
4350 9.5805e-05
4351 9.5772e-05
4352 9.5739e-05
4353 9.5706e-05
4354 9.5673e-05
4355 9.564e-05
4356 9.5607e-05
4357 9.5574e-05
4358 9.5541e-05
4359 9.5508e-05
4360 9.5476e-05
4361 9.5443e-05
4362 9.541e-05
4363 9.5377e-05
4364 9.5344e-05
4365 9.5311e-05
4366 9.5278e-05
4367 9.5246e-05
4368 9.5213e-05
4369 9.518e-05
4370 9.5147e-05
4371 9.5115e-05
4372 9.5082e-05
4373 9.5049e-05
4374 9.5017e-05
4375 9.4984e-05
4376 9.4951e-05
4377 9.4919e-05
4378 9.4886e-05
4379 9.4853e-05
4380 9.4821e-05
4381 9.4788e-0

6354 5.3049e-05
6355 5.3035e-05
6356 5.3022e-05
6357 5.3008e-05
6358 5.2995e-05
6359 5.2981e-05
6360 5.2968e-05
6361 5.2954e-05
6362 5.2941e-05
6363 5.2927e-05
6364 5.2914e-05
6365 5.29e-05
6366 5.2887e-05
6367 5.2873e-05
6368 5.286e-05
6369 5.2846e-05
6370 5.2833e-05
6371 5.2819e-05
6372 5.2806e-05
6373 5.2792e-05
6374 5.2779e-05
6375 5.2766e-05
6376 5.2752e-05
6377 5.2739e-05
6378 5.2725e-05
6379 5.2712e-05
6380 5.2698e-05
6381 5.2685e-05
6382 5.2672e-05
6383 5.2658e-05
6384 5.2645e-05
6385 5.2631e-05
6386 5.2618e-05
6387 5.2605e-05
6388 5.2591e-05
6389 5.2578e-05
6390 5.2565e-05
6391 5.2551e-05
6392 5.2538e-05
6393 5.2525e-05
6394 5.2511e-05
6395 5.2498e-05
6396 5.2485e-05
6397 5.2471e-05
6398 5.2458e-05
6399 5.2445e-05
6400 5.2431e-05
6401 5.2418e-05
6402 5.2405e-05
6403 5.2391e-05
6404 5.2378e-05
6405 5.2365e-05
6406 5.2352e-05
6407 5.2338e-05
6408 5.2325e-05
6409 5.2312e-05
6410 5.2298e-05
6411 5.2285e-05
6412 5.2272e-05
6413 5.2259e-05
6414 5.2245e-05
6415 5.2232e-05
6416 5.2219

8242 3.4339e-05
8243 3.4332e-05
8244 3.4325e-05
8245 3.4318e-05
8246 3.431e-05
8247 3.4303e-05
8248 3.4296e-05
8249 3.4289e-05
8250 3.4282e-05
8251 3.4275e-05
8252 3.4267e-05
8253 3.426e-05
8254 3.4253e-05
8255 3.4246e-05
8256 3.4239e-05
8257 3.4232e-05
8258 3.4224e-05
8259 3.4217e-05
8260 3.421e-05
8261 3.4203e-05
8262 3.4196e-05
8263 3.4189e-05
8264 3.4181e-05
8265 3.4174e-05
8266 3.4167e-05
8267 3.416e-05
8268 3.4153e-05
8269 3.4146e-05
8270 3.4139e-05
8271 3.4132e-05
8272 3.4124e-05
8273 3.4117e-05
8274 3.411e-05
8275 3.4103e-05
8276 3.4096e-05
8277 3.4089e-05
8278 3.4082e-05
8279 3.4075e-05
8280 3.4067e-05
8281 3.406e-05
8282 3.4053e-05
8283 3.4046e-05
8284 3.4039e-05
8285 3.4032e-05
8286 3.4025e-05
8287 3.4018e-05
8288 3.4011e-05
8289 3.4004e-05
8290 3.3997e-05
8291 3.3989e-05
8292 3.3982e-05
8293 3.3975e-05
8294 3.3968e-05
8295 3.3961e-05
8296 3.3954e-05
8297 3.3947e-05
8298 3.394e-05
8299 3.3933e-05
8300 3.3926e-05
8301 3.3919e-05
8302 3.3912e-05
8303 3.3905e-05
8304 3.3897e-05

NameError: name 'x' is not defined