In [29]:
'''
Simple RNN for adding 2 numbers in binary

forward forward..
backward - we want to iteratively change the weights starting from random positions 
to minimize error we change the weights of each layer in the directions of the derivatives of the output  of that layer
(note that the overall error function to optimize is formed by all layer functions but as we go back we no longer care  
about the functions in front and their weights..)
we want the change to be proportional to the size of the error and also the size of the input - so we weight the derivatives 
by the errors deltas and inputs
when passing error delta back to previous layer - we multiply current error weighted derivative by the weights to see how much
of the erro corresponds to each of the previous layer outputs
'''


######################################### THE DATA ######################################### 

import numpy as np
import copy
np.random.seed(0)
from numpy import ones, zeros

# the data generating params
n_samples = 1#5000
n_bit = 8
largest_input_number = pow(2, n_bit) / 2

#### done with constants

def generate_random_sample():
    # generate 2 random numbers and their sum
    input_1, input_2 = np.random.randint(0, largest_input_number), np.random.randint(0, largest_input_number)
    true_output = input_1 + input_2

    # calculate the binaries
    input_1_binary = [int(x) for x in np.binary_repr(input_1, n_bit)]
    input_2_binary = [int(x) for x in np.binary_repr (input_2, n_bit)]
    true_output_binary = [int(x) for x in np.binary_repr(true_output, n_bit)]
    
    return input_1_binary, input_2_binary, true_output_binary



In [3]:
############################################# THE RNN #############################################

# RNN params
input_dim = 2
output_dim = 1
recursive_size = 16
learning_rate = .1

# RNN weights
# simple RNN with one recurent hidden layer and one output layer

# hidden layer weights
w_recursive = np.random.standard_normal(size=(input_dim, recursive_size))
w_previous_recursive = np.random.standard_normal(size=(recursive_size, recursive_size))
# output layer weights
w_dense = np.random.standard_normal(size=(recursive_size, output_dim))



In [None]:
# RNN Functions

# util math functions
def sigmoid(x): return (1 / (1 + np.exp(-x)))
def sigmoid_derivative(x): return x * (1 - x)


In [56]:
# gets an input sample and recurrent input and returns all layer outputs

def feed_forward_recursive_layer(inputs):#input_data, previous_recursive_layer_output):

    raw_outputs = np.dot(inputs["from_previous"], w_recursive) + z, w_previous_recursive)

    return {"raw": raw_outputs, "activation": sigmoid(raw_outputs)}

# backprop through time rnn layer 
# takes: its raw output, all the errors deltas sent to its successors
# returns: the overall error delta to pass to its precedessors and the deltas to update its own weights
def backprop_recursive_layer(inputs, outputs, errors):#error_to_output, error_to_next_recursive,  layer_raw_output):
    
    # calculate error as coming back from: 1.what was sent to the output, 2.what was sent to the next hidden layer
    error = np.dot(errors["to_output"], w_dense) + np.dot(errors["to_next_recursive"], w_previous_recursive)
    # total delta of the layer to pass further down to previous inputing layers: error_weighted_derivative of output
    total_delta = sigmoid_derivative(outputs["raw"])* error 
    # delta corresponding to input from below layer based on inputs from that layer
    input_w_delta = np.dot(inputs["from_previous"].T, total_delta) 
    # delta corresponding to input from previous hidden layer based on inputs from that layer
    recursive_w_delta = np.dot(inputs["from_recursive"].T, total_delta)
    
    return {"total_delta": total_delta, "recursive_w_delta" : recursive_w_delta, "input_w_delta" : input_w_delta}


In [57]:

# gets an input sample and recurrent input and returns all layer outputs
def feed_forward_dense_layer(inputs):
    
    raw_output = np.dot(inputs["from_previous"], w_dense)
    
    return {"raw": raw_output, "activation": sigmoid(raw_output)}

# gets the error delta it sent to output and the layer input and returns the delta to pass down and 
# the delta to update its weights
def backprop_dense_layer(inputs, outputs, errors):
    
    # delta at this layer
    total_delta = 1* errors["to_output"] # being the output dense layer, derivative = 1
    input_w_delta = np.dot(inputs["from_previous"].T, total_delta)
    
    return {"total_delta": total_delta, "input_w_delta" : input_w_delta}


In [61]:

# feed forward one sample unit through all layers
def feed_forward_network(inputs):
    
    recursive_layer_outputs = feed_forward_recursive_layer(inputs)
    dense_layer_outputs = feed_forward_dense_layer({"from_previous": recursive_layer_outputs["activation"]})
    
    return {"from_dense":dense_layer_outputs,"from_recursive":recursive_layer_outputs}
    

In [69]:
# back prop one sample unit through all layers
# because it's recursive it takes possible deltas from successor samples feeded forward, just as the feed forward takes recursive 
# outputs from previous samples 
# should return/fill the updates coresponding to this sample
def back_prop_network(inputs, all_layer_outputs, correct_output, next_sample_deltas):
    
    inputs_dense = {"from_previous":all_layer_outputs["from_recursive"]["activation"]}
    outputs_dense = all_layer_outputs["from_dense"]
    errors_dense = {"to_output": correct_output - all_layer_outputs["from_dense"]["activation"]}
    dense_deltas = backprop_dense_layer(inputs_dense, outputs_dense, errors_dense)
    
    inputs_recursive = inputs
    outputs_recursive = all_layer_outputs["from_recursive"]
    errors_recursive = {"to_output": dense_deltas["total_delta"],
                       "to_next_recursive": next_sample_deltas["recursive_deltas"]["total_delta"]} 
    recursive_deltas = backprop_recursive_layer(inputs_recursive, outputs_recursive, errors_recursive)
    
    return {"dense_deltas":dense_deltas, "recursive_deltas":recursive_deltas}  
    

In [70]:
# test functions - not asserting correct results - just making sure they run with correct dimensions
# set test constants
n_bit = 3
largest_input_number = pow(2, n_bit) / 2
recursive_size = 3
sample_data= np.array([[0,1]])
error = 1
# init test weights to 1 for simple test of correct values
w_recursive = np.ones((input_dim, recursive_size))
w_previous_recursive = np.ones((recursive_size, recursive_size))
w_dense = np.ones((recursive_size, output_dim))


print generate_random_sample()
print sigmoid(np.array([range(recursive_size)]))
print sigmoid_derivative(np.array([range(recursive_size)]))

inputs_recursive = {"from_previous": sample_data, "from_recursive": np.ones((1,recursive_size))}
print feed_forward_recursive_layer(inputs_recursive)

inputs_dense = {"from_previous": ones((1,recursive_size))}
print feed_forward_dense_layer(inputs_dense) 

outputs_dense = {"raw": 0, "activation": 0}
errors_dense = {"to_output": 1}
print backprop_dense_layer(inputs_dense, outputs_dense, errors_dense) 

outputs_recursive = {"raw": ones((1,recursive_size))/2, "activation": ones((1,recursive_size))/2}
# assume there was no error sent to next hidden layer
errors_recursive_case1 = {"to_output": ones((1,recursive_size)), "to_next_recursive": zeros((1,recursive_size))}
# assume there was no error sent to next layer (the output dense layer)
errors_recursive_case2 = {"to_output": zeros((1,recursive_size)), "to_next_recursive": ones((1,recursive_size))}
print backprop_recursive_layer(inputs_recursive,  outputs_recursive, errors_recursive_case1)
print backprop_recursive_layer(inputs_recursive,  outputs_recursive, errors_recursive_case2)

print feed_forward_network(inputs_recursive)
all_layer_outputs = {"from_dense":outputs_dense, "from_recursive":outputs_recursive}
correct_output = 1
next_sample_recursive_deltas = {"total_delta": zeros((1,recursive_size)), "recursive_w_delta" : None, "input_w_delta" : None}
next_sample_deltas = {"dense_deltas":None, "recursive_deltas":next_sample_recursive_deltas}
print back_prop_network(inputs_recursive, all_layer_outputs, correct_output, next_sample_deltas)


([0, 0, 1], [0, 0, 0], [0, 0, 1])
[[ 0.5         0.73105858  0.88079708]]
[[ 0  0 -2]]
{'raw': array([[ 4.,  4.,  4.]]), 'activation': array([[ 0.98201379,  0.98201379,  0.98201379]])}
{'from_previous': array([[ 1.,  1.,  1.]])}
{'raw': array([[ 3.]]), 'activation': array([[ 0.95257413]])}
{'input_w_delta': array([[ 1.],
       [ 1.],
       [ 1.]]), 'total_delta': 1}
{'input_w_delta': array([[ 0.  ,  0.  ,  0.  ],
       [ 0.75,  0.75,  0.75]]), 'recursive_w_delta': array([[ 0.75,  0.75,  0.75],
       [ 0.75,  0.75,  0.75],
       [ 0.75,  0.75,  0.75]]), 'total_delta': array([[ 0.75,  0.75,  0.75]])}
{'input_w_delta': array([[ 0.  ,  0.  ,  0.  ],
       [ 0.75,  0.75,  0.75]]), 'recursive_w_delta': array([[ 0.75,  0.75,  0.75],
       [ 0.75,  0.75,  0.75],
       [ 0.75,  0.75,  0.75]]), 'total_delta': array([[ 0.75,  0.75,  0.75]])}
{'from_previous': array([[ 0.98201379,  0.98201379,  0.98201379]])}
{'from_dense': {'raw': array([[ 2.94604137]]), 'activation': array([[ 0.95007606]

ValueError: shapes (2,1) and (3,3) not aligned: 1 (dim 1) != 3 (dim 0)

In [None]:
# feeds forward a sequence of samples 
feed_forward_network_sequence()

# back propagates a sequence of samples 
back_prop_network_sequence()

# update weights
update_network_weights()