# Babby's Second Neural Net

In [None]:
import math
import random

LAYER_SIZES = [2,6,6,1]
NUM_LAYERS = len(LAYER_SIZES)
LEARNING_RATE = 0.1

Our activation function is the "sigmoid"/"logistic function" x ↦ 1/(1+e^-x)

In [None]:
def activation(x):
    return 1/(1+math.exp(-x))

def d_activation(x):
    return math.exp(-x)/(1+math.exp(-x))**2

In [None]:
weights = [[[random.uniform(-0.1,0.1) for _ in range(LAYER_SIZES[i])] for _ in range(LAYER_SIZES[i+1])] for i in range(NUM_LAYERS-1)]

The network will use sigmoid activations for all hidden layers but raw linear combo at the output.

In [None]:
def train(xs, ys):
    global weights
    pres = []
    values = []
    # compute input values
    pres.append(xs)
    values.append(xs)
    # compute other layers
    for i in range(1, NUM_LAYERS-1):
        l = [sum(values[i-1][k]*weights[i-1][j][k] for k in range(LAYER_SIZES[i-1])) for j in range(LAYER_SIZES[i])]
        pres.append(l)
        values.append(list(map(activation,l)))
    # compute output layer
    values.append([sum(values[NUM_LAYERS-2][k]*weights[NUM_LAYERS-2][j][k] for k in range(LAYER_SIZES[-2])) for j in range(LAYER_SIZES[-1])])
    # compute loss
    loss = sum((values[-1][j]-ys[j])**2 for j in range(LAYER_SIZES[-1]))
    deltas = [[] for _ in range(NUM_LAYERS)]
    # compute deltas
    deltas[-1] = [(values[-1][j]-ys[j]) for j in range(LAYER_SIZES[-1])]
    for i in range(NUM_LAYERS-2,0,-1):
        deltas[i] = [d_activation(pres[i][j])*sum(weights[i][k][j]*deltas[i+1][k] for k in range(LAYER_SIZES[i+1])) for j in range(LAYER_SIZES[i])]
    # update weights
    for i in range(NUM_LAYERS-1):
        for j in range(LAYER_SIZES[i]):
            for k in range(LAYER_SIZES[i+1]):
                weights[i][k][j] -= LEARNING_RATE*2*deltas[i+1][k]*values[i][j]
    return loss

def test(xs):
    values = xs
    for i in range(1, NUM_LAYERS-1):
        values = list(map(activation,[sum(values[k]*weights[i-1][j][k] for k in range(LAYER_SIZES[i-1])) for j in range(LAYER_SIZES[i])]))
    return [sum(values[k]*weights[-1][j][k] for k in range(LAYER_SIZES[-2])) for j in range(LAYER_SIZES[-1])]

Let's train on 1 million random data points.

In [None]:
for batch in range(1000):
    loss = 0
    for i in range(1000):
        x0 = random.random()
        x1 = random.random()
        loss += train([x0,x1],[x0*x1])
    loss /= 100
    if batch % 100 == 0:
        print("Batch: {} Avg Loss: {}".format(batch, loss))

In [None]:
test([0.4,0.2])