In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
neurons = [4,2,4]

In [None]:
weights = None
filename = "weights.npy"
try:
    weights = np.load(filename)
except:
    weights = np.zeros((len(neurons)-1, max(neurons), max(neurons)+1))
    for i in range(len(weights)):
        layer = np.random.rand(neurons[i+1], neurons[i]+1)
        layer.resize(max(neurons), max(neurons)+1)
        weights[i] = layer
weights

In [None]:
# from sklearn import datasets, preprocessing
# iris = datasets.load_iris()
# data = preprocessing.normalize(iris.data)
# iris

In [None]:
data = np.array([
    [1,0,0,0],
    [0,1,0,0],
    [0,0,1,0],
    [0,0,0,1],
])
targets = np.array([
    [1,0,0,0],
    [0,1,0,0],
    [0,0,1,0],
    [0,0,0,1],
])

In [None]:
def activation(x):
    return (1 / (1+np.pow(np.e, -x)))

In [None]:
# run inference
def inference(weights, data):
    sample = np.copy(data)
    sample.resize(max(neurons))

    # add 1 at the beginning so bias properly applies 
    sample = np.pad(sample, (1,0), "constant", constant_values=(1))
    for (i,layer) in enumerate(weights):
        sample = layer @ sample
        sample = activation(sample)
        sample = np.pad(sample, (1,0), "constant", constant_values=(1))
    return sample[1:(neurons[-1]+1)] # strip all the useless padding

In [None]:
def backprop(weights, sample, target, lr=0.1, momentum=0.0, prev_updates=None):
    # Forward pass
    activations = []
    inputs = []
    x = sample.copy()
    x.resize(max(neurons))
    x = np.pad(x, (1,0), "constant", constant_values=(1))
    inputs.append(x)
    for layer in weights:
        x = layer @ x
        x = activation(x)  # sigmoid
        activations.append(x)
        x = np.pad(x, (1,0), "constant", constant_values=(1))
        inputs.append(x)
    # Remove padding for output
    output = activations[-1][0:neurons[-1]]
    # Calculate output error
    delta = (output - target) * output * (1 - output)
    delta.resize(max(neurons))
    deltas = [delta]
    error = np.sqrt(np.sum(delta**2))

    # Backward pass
    for i in range(len(weights)-1, 0, -1):
        act = activations[i-1][0:neurons[i]]
        w = weights[i][:,1:neurons[i]+1]
        delta = (w.T @ deltas[0]) * act * (1 - act)
        delta.resize(max(neurons))
        deltas.insert(0, delta)

    # Initialize previous updates if needed
    if prev_updates is None:
        prev_updates = [np.zeros_like(w) for w in weights]

    # Update weights with momentum
    new_prev_updates = []
    for i in range(len(weights)):
        inp = inputs[i][np.newaxis, :]
        delt = deltas[i][:, np.newaxis]
        update = lr * delt @ inp
        weights[i][:, :inp.shape[1]] -= update + momentum * prev_updates[i][:, :inp.shape[1]]
        # Store this update for next iteration
        prev_update = update
        new_prev_updates.append(prev_update)
    return (weights, error, new_prev_updates)

In [None]:
errors = []

In [None]:
# Training loop example
epochs = 4000
epochData = list(zip(data, targets))
for epoch in range(epochs):
    epochError = []
    random.shuffle(epochData)
    for sample, target in epochData:
        (weights,error) = backprop(weights, sample, target, lr=0.6, momentum=0.0)
        epochError.append(error)
    errors.append(np.mean(epochError))

In [None]:
plt.figure()
plt.plot(errors)
plt.title("Error over time")
plt.xlabel("Epoch")
plt.ylabel("Error")
plt.show()

In [None]:
#np.save(filename, weights)

In [None]:
def test(weights, data, target):
    correct = 0
    for sample, target in zip(data, target):
        targetArr = np.zeros(3)
        targetArr[target] = 1
        output = inference(weights, sample)
        if np.argmax(output) == np.argmax(targetArr):
            correct += 1
        else:
            print("results", np.round(output,2))
            print("target ", targetArr)
    return correct, len(data)-correct, correct/len(data)*100
print(test(weights, data, targets))


In [None]:
weights