In [46]:
import copy, numpy as np
np.random.seed(0)


# compute sigmoid nonlinearity
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output*(1-output)



In [47]:
binary_dim = 8
largest_number = pow(2,binary_dim)

def get_training_set():
    int2binary = {}
    binary = np.unpackbits(np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
    for i in range(largest_number):
        int2binary[i] = binary[i]
    return int2binary


In [48]:
x_train = get_training_set()
x_train

{0: array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8),
 1: array([0, 0, 0, 0, 0, 0, 0, 1], dtype=uint8),
 2: array([0, 0, 0, 0, 0, 0, 1, 0], dtype=uint8),
 3: array([0, 0, 0, 0, 0, 0, 1, 1], dtype=uint8),
 4: array([0, 0, 0, 0, 0, 1, 0, 0], dtype=uint8),
 5: array([0, 0, 0, 0, 0, 1, 0, 1], dtype=uint8),
 6: array([0, 0, 0, 0, 0, 1, 1, 0], dtype=uint8),
 7: array([0, 0, 0, 0, 0, 1, 1, 1], dtype=uint8),
 8: array([0, 0, 0, 0, 1, 0, 0, 0], dtype=uint8),
 9: array([0, 0, 0, 0, 1, 0, 0, 1], dtype=uint8),
 10: array([0, 0, 0, 0, 1, 0, 1, 0], dtype=uint8),
 11: array([0, 0, 0, 0, 1, 0, 1, 1], dtype=uint8),
 12: array([0, 0, 0, 0, 1, 1, 0, 0], dtype=uint8),
 13: array([0, 0, 0, 0, 1, 1, 0, 1], dtype=uint8),
 14: array([0, 0, 0, 0, 1, 1, 1, 0], dtype=uint8),
 15: array([0, 0, 0, 0, 1, 1, 1, 1], dtype=uint8),
 16: array([0, 0, 0, 1, 0, 0, 0, 0], dtype=uint8),
 17: array([0, 0, 0, 1, 0, 0, 0, 1], dtype=uint8),
 18: array([0, 0, 0, 1, 0, 0, 1, 0], dtype=uint8),
 19: array([0, 0, 0, 1, 0, 0, 1, 1], dtyp

In [49]:
def get_random_sample(dataset):
    # generate a simple addition problem (a + b = c)
    a_int = np.random.randint(largest_number/2) # int version
    a = dataset[a_int] # binary encoding

    b_int = np.random.randint(largest_number/2) # int version
    b = dataset[b_int] # binary encoding

    # true answer
    c_int = a_int + b_int
    c = dataset[c_int]
    return a,b,c

In [50]:
a,b,c = get_random_sample(x_train)
print(a)
print(b)
print(c)


[0 0 1 0 1 1 0 0]
[0 0 1 0 1 1 1 1]
[0 1 0 1 1 0 1 1]


In [51]:
def get_column_from_right(a,b,c, index) : 
    # generate input and output
    X = np.array([[a[binary_dim - index - 1],b[binary_dim - index - 1]]])
    y = np.array([[c[binary_dim - index - 1]]])
    return X, y

def get_column_from_left(a,b,c,index) : 
    # generate input and output
    X = np.array([[a[index],b[index]]])
    y  = np.array([[c[index]]])
    return X,y

for position in range(binary_dim):
    X, y = get_column_from_right(a,b,c, position)
    print(X)
    #print(y)

for position in range(binary_dim):
    X,y = get_column_from_left(a,b,c, position)
    print(X)

[[0 1]]
[[0 1]]
[[1 1]]
[[1 1]]
[[0 0]]
[[1 1]]
[[0 0]]
[[0 0]]
[[0 0]]
[[0 0]]
[[1 1]]
[[0 0]]
[[1 1]]
[[1 1]]
[[0 1]]
[[0 1]]


In [52]:

# input variables
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1


# initialize neural network weights
wi = 2*np.random.random((input_dim,hidden_dim)) - 1
wo = 2*np.random.random((hidden_dim,output_dim)) - 1
wh = 2*np.random.random((hidden_dim,hidden_dim)) - 1

wi_update = np.zeros_like(wi)
wo_update = np.zeros_like(wo)
wh_update = np.zeros_like(wh)


In [53]:
def get_model(input):
    # hidden layer (input ~+ prev_hidden)
    hidden_layer = sigmoid(np.dot(input,wi) + np.dot(prev_hidden_layers[-1],wh))

    # output layer (new binary representation)
    output_layer = sigmoid(np.dot(hidden_layer,wo))
    
    return hidden_layer, output_layer

In [54]:



x_train = get_training_set()

In [None]:
# training logic
for j in range(10000):
    a,b,c = get_random_sample(x_train)
    # where we'll store our best guess (binary encoded)
    d = np.zeros_like(c)
    
    prev_hidden_layers = list()
    prev_hidden_layers.append(np.zeros(hidden_dim))
    output_layer_deltas = list()
    # moving along the positions in the binary encoding
    for position in range(binary_dim):
        X, y = get_column_from_right(a,b,c,position)
        hidden_layer, output_layer = get_model(X)
        # store hidden layer so we can use it in the next timestep
        prev_hidden_layers.append(copy.deepcopy(hidden_layer))
        # decode estimate so we can print it out
        d[binary_dim - position - 1] = np.round(output_layer[0][0])

        # did we miss?... if so, by how much?
        output_layer_error = y - output_layer
        output_layer_delta_error = (output_layer_error)*sigmoid_output_to_derivative(output_layer)
        output_layer_deltas.append(output_layer_delta_error)

        print("x "+  str(X) + 
              " y "  + str(y) + 
              " pred y" + str(output_layer) + 
              " error "  + str(output_layer_error) 
              + " delta " + str(output_layer_delta_error ))

    
    print(d)
    
    next_hidden_layer_delta = np.zeros(hidden_dim)

    for position in range(binary_dim):
        X,y = get_column_from_left(a,b,c,position)
        hidden_layer = prev_hidden_layers[-position-1]
        prev_hidden_layer = prev_hidden_layers[-position-2]

        # error at output layer
        output_layer_delta = output_layer_deltas[-position-1]
        # error at hidden layer
        hidden_layer_delta = ((next_hidden_layer_delta.dot(wh.T) + output_layer_delta.dot(wo.T))
                                        * sigmoid_output_to_derivative(hidden_layer))
        next_hidden_layer_delta = hidden_layer_delta

        print(np.atleast_2d(hidden_layer).T)
        print(output_layer_delta)
        # let's update all our weights so we can try again
        #wi_update += np.atleast_2d(hidden_layer).T.dot(output_layer_delta)
        #wh_update += np.atleast_2d(prev_hidden_layer).T.dot(hidden_layer_delta)
        #wo_update += X.T.dot(hidden_layer_delta)
    
    wi += wi_update * alpha
    wo += wo_update * alpha
    wh += wh_update * alpha    

    wi_update *= 0
    wo_update *= 0
    wh_update *= 0


x [[1 1]] y [[0]] pred y[[0.45619936]] error [[-0.45619936]] delta [[-0.11317462]]
x [[0 0]] y [[1]] pred y[[0.31115856]] error [[0.68884144]] delta [[0.14764552]]
x [[0 1]] y [[1]] pred y[[0.2726555]] error [[0.7273445]] delta [[0.14424295]]
x [[0 1]] y [[1]] pred y[[0.28563956]] error [[0.71436044]] delta [[0.14576496]]
x [[1 1]] y [[0]] pred y[[0.36574524]] error [[-0.36574524]] delta [[-0.08484399]]
x [[0 1]] y [[0]] pred y[[0.28794219]] error [[-0.28794219]] delta [[-0.05903721]]
x [[0 0]] y [[1]] pred y[[0.33454775]] error [[0.66545225]] delta [[0.14814668]]
x [[0 0]] y [[0]] pred y[[0.29889392]] error [[-0.29889392]] delta [[-0.06263512]]
[0 0 0 0 0 0 0 0]
[[0.60121409]
 [0.91878284]
 [0.08616861]
 [0.89912503]
 [0.19303307]
 [0.23632726]
 [0.62919738]
 [0.55206265]
 [0.645745  ]
 [0.55462609]
 [0.39326612]
 [0.2310805 ]
 [0.28193156]
 [0.54320197]
 [0.16109078]
 [0.35789351]]
[[-0.06263512]]
[[0.56417117]
 [0.91179388]
 [0.07824529]
 [0.931073  ]
 [0.17437358]
 [0.30877979]
 [0

ValueError: operands could not be broadcast together with shapes (2,16) (16,1) (2,16) 