In [11]:
import copy, numpy as np

In [12]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [13]:
def sigmoid_output_to_derivative(output):
    return output * (1 - output)

In [14]:
binary_dim = 8
int2binary = {}

In [15]:
largest_number = pow(2, binary_dim)

In [16]:
binary = np.unpackbits(
        np.array([range(largest_number)], dtype=np.uint8).T, axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

In [17]:
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1

In [18]:
synapse_0 = 2*np.random.random([input_dim, hidden_dim]) - 1
synapse_h = 2*np.random.random([hidden_dim, hidden_dim]) - 1
synapse_1 = 2*np.random.random([hidden_dim, output_dim]) - 1
synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)

In [20]:
for j in range(100000):
    a_int = np.random.randint(largest_number/2)
    a = int2binary[a_int]
    
    b_int = np.random.randint(largest_number/2)
    b = int2binary[b_int]
    
    c_int = a_int + b_int
    c = int2binary[c_int]
    
    layer_1_values = list()
    layer_2_deltas = list()
    
    layer_1_values.append(np.zeros(hidden_dim))
    overallError = 0
    for i in range(binary_dim)[::-1]:
        X = np.array([[a[i], b[i]]])
        y = np.array([c[i]]).T
        
        layer_1 = sigmoid(np.dot(X, synapse_0) + np.dot(layer_1_values[-1], synapse_h))
        layer_2 = sigmoid(np.dot(layer_1, synapse_1))
        
        error = y - layer_2
        layer_2_deltas.append(error * sigmoid_output_to_derivative(layer_2))

        overallError += np.abs(error)
        layer_1_values.append(layer_1)

    future_layer_1 = np.zeros(hidden_dim)
    for i in range(binary_dim)[::-1]:
        first = binary_dim - i - 1
        
        X = np.array([[a[first], b[first]]])
        layer_1 = layer_1_values[i + 1]
        prev_layer_1 = layer_1_values[i]

        layer_2_delta = layer_2_deltas[i]

        layer_1_delta = (future_layer_1.dot(synapse_h.T) + layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)

        synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        synapse_0_update += X.T.dot(layer_1_delta)
        
        future_layer_1 = layer_1_delta
    

    synapse_0 += synapse_0_update * alpha
    synapse_1 += synapse_1_update * alpha
    synapse_h += synapse_h_update * alpha    

    synapse_0_update *= 0
    synapse_1_update *= 0
    synapse_h_update *= 0
    if(j % 10000 == 0):
        print(overallError)

[[ 4.16610406]]
[[ 0.58272403]]
[[ 0.29792971]]
[[ 0.14407761]]
[[ 0.16030346]]
[[ 0.15637487]]
[[ 0.14872092]]
[[ 0.11241034]]
[[ 0.06109524]]
[[ 0.08109419]]
