In [85]:
import numpy as np

In [86]:
EPS = 1e-6
ALPHA = 0.01

In [87]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [88]:
def forward_prop(X, weights, biases, activations):
    assert len(weights) == len(biases) == len(activations)
    result = X
    outputs = []
    for W, b, activation in zip(weights, biases, activations):
        layer_output = np.dot(result, W) + b
        result = activation(layer_output)
        outputs.append(result)
    return result, outputs

In [89]:
def cross_entropy_loss(y, t):
    return -np.mean(t * np.log(y) + (1 - t) * np.log(1 - y))

In [96]:
def back_prop(x, pred, target, weights, biases, outputs):
    Ew = pred - target
    Ev = outputs[0] * np.dot(weights[1], Ew)
    loss = cross_entropy_loss(pred, target)
    dW = np.outer(outputs[0], Ew)
    dV = np.outer(x, Ev)
    return loss, (dV, dW, Ev, Ew)

In [91]:
def predict(x, weights, biases, activations):
    return np.argmax(forward_prop(x, weights, biases, activations)[0])

In [98]:
def fit(X, y):
    assert X.shape[0] == y.shape[0]

    input_dim = X.shape[1]
    hidden_dim = 3
    output_dim = 10
    input_to_hidden_W = np.random.random((input_dim, hidden_dim))
    input_to_hidden_b = np.random.random(hidden_dim)
    hidden_to_output_W = np.random.random((hidden_dim, output_dim))
    hidden_to_output_b = np.random.random(output_dim)

    weights = [input_to_hidden_W, hidden_to_output_W]
    biases = [input_to_hidden_b, hidden_to_output_b]
    activations = [lambda x: x, sigmoid]
    layers = weights + biases
    err = [9999]
    epoch = 0
    while EPS < np.mean(err):
        err = []
        upd = [0] * len(layers)
        for i in range(X.shape[0]):
            predicted, outputs = forward_prop(X[i], weights, biases, activations)
            loss, grad = back_prop(X[i], predicted, y[i], weights, biases, outputs)

            for j in range(len(layers)):
                layers[j] -= upd[j]

            for j in range(len(layers)):
                upd[j] = ALPHA * grad[j]
            err.append(loss)
        print 'Epoch {}: loss {}'.format(epoch, np.mean(err))
        epoch += 1
    return weights, biases, activations




In [99]:
X = np.random.binomial(1, 0.5, (1000, 10))
y = X ^ 1
params = fit(X, y)
print predict(X[0], *params)

Epoch 0: loss 0.653046012994
Epoch 1: loss 0.555681862236
Epoch 2: loss 0.525769166129
Epoch 3: loss 0.510282693288
Epoch 4: loss 0.499600659827
Epoch 5: loss 0.490598183377
Epoch 6: loss 0.483521700664
Epoch 7: loss 0.478367900947
Epoch 8: loss 0.47466638584
Epoch 9: loss 0.471966937645
Epoch 10: loss 0.470214559162
Epoch 11: loss nan
0


  
  
  
  This is separate from the ipykernel package so we can avoid doing imports until
