In [1]:
import numpy as np
import pandas as pd

In [2]:
from bokeh.plotting import figure as bokeh_figure, output_notebook, show
output_notebook()

def figure(*args, **kwargs):
    return bokeh_figure(*args, **kwargs, width=550, height=300)

### Define our function

In [3]:
model = pd.DataFrame({'x1': [0, 0, 1, 1], 'x2': [0, 1, 0, 1], 'x3': [1, 1, 1, 1], 'y': [0, 1, 1, 0]})
model

Unnamed: 0,x1,x2,x3,y
0,0,0,1,0
1,0,1,1,1
2,1,0,1,1
3,1,1,1,0


In [4]:
x = model.values.T[:-1].astype(np.float64)
y = model.values.T[-1].reshape((1,-1)).astype(np.float64)

In [5]:
# Each row is a set of values for each input neuron xi
# Each column is a set of data for {x1, x2, x3}
x

array([[0., 0., 1., 1.],
       [0., 1., 0., 1.],
       [1., 1., 1., 1.]])

In [6]:
y

array([[0., 1., 1., 0.]])

### Define our unbiased feedforward neural network
Each layer consists of a set of weights, a set of biases, and an activation function:
\begin{align}
\vec{y^{(l)}} &= \operatorname A(\vec{z^{(l)}})\\
\vec{z^{(l)}} &= \hat{w}^{(l)}\times \vec{y^{(l-1)}}+\vec{b^{(l)}}
\end{align}

In the unbiased case, $b^{(l)}=0$

### Define the activation function and its derivative

In [7]:
import numba

jit = lambda f: numba.jit(f, nopython=True)

@jit
def expit(x):
    return 1/(1+np.exp(-x))

@jit
def expit_derivative(y):
    return y*(1-y)

### Create our network layers

In [8]:
n_x = x.shape[0]
hidden_layer_size = 4

w_1 = np.random.rand(hidden_layer_size, n_x, )
b_1 = np.zeros(hidden_layer_size)

w_2 = np.random.rand(1, hidden_layer_size)
b_2 = np.zeros(1)

output = np.zeros_like(y)
weights = [w_1, w_2]

### Define the forward propogation routines

In [9]:
@jit
def feed_forward_layer(activate, inputs, layer_weights, layer_biases):
    return activate(layer_weights@inputs + layer_biases)

@jit
def feed_forward(inputs, activators, weights, biases):
    layers = [inputs]
    for activator, layer_weights, layer_biases in zip(activators, weights, biases):
        inputs = feed_forward_layer(activator, inputs, layer_weights, layer_biases)
        layers.append(inputs)
    return layers

### Define the backpropogation routines

In [10]:
@jit
def dz_dw(a, w, l, m, i, p, q, j, derivative):
    if l == m:
        return a[m][q, j] if i == p else 0.0
    
    w_l = w[l]
    return np.sum(np.array([w_l[i, k] * 
                   derivative(a[l][k, j]) *  
                   dz_dw(a, w, l-1, m, k, p, q, j, derivative) 
                   for k in range(w_l.shape[1])]))

@jit
def dc_dw(a, y, w, m, p, q, derivative):
    l = len(w)-1
    a_l = a[l+1]
    indices = list(np.ndindex(a_l.shape))
    return np.sum(np.array([(a_l[i, j] - y[i, j]) * 
                   derivative(a_l[i, j]) * 
                   dz_dw(a, w, l, m, i, p, q, j, derivative)
                   for i, j in indices]))

@jit
def dc_dw_mat(layers, y, weights, m, derivative):
    source = weights[m]
    delta = np.empty_like(source)
    for p, q in np.ndindex(delta.shape):
        delta[p, q] = dc_dw(layers, y, weights, m, p, q, derivative)
    return delta

### Define our training function

In [11]:
@jit
def train(n, inputs, outputs, weights, biases, activators, activator_derivatives, rate):
    for i in range(n):
        layers = feed_forward(inputs, activators, weights, biases)
        
        # Backpropogate the changes to the weights matrix
        for m in range(len(weights)):
            dw = dc_dw_mat(layers, y, weights, m, activator_derivatives[m])
            weights[m] -= dw * rate

### Train the network

In [12]:
train(100000, x, y, [w_1, w_2], [0, 0], [expit]*2, [expit_derivative]*2, 1)

### Evaluate result

In [18]:
xi = np.array([0,0,0.])
*_, result = feed_forward(xi, [expit]*2, [w_1, w_2], [0, 0])
result

array([0.01465845])