# Config

In [2]:
vocab_size = 400000
embedding_dim = 50
hidden_layer_size = 80
num_rows = 1000
num_steps = 3
batch_size = 100
num_batches = num_rows/batch_size

# Preprocessing

In [3]:
def read_glove(filename):
    file = open(filename)    
    embedding = np.ndarray([vocab_size, embedding_dim])
    word_id_dict = {}
    id_word_dict = {}
    id = 0
    for line in file:
        items = line.split(' ')
        word_id_dict[items[0]] = id
        id_word_dict[id] = items[0]
        embedding[id,:] = np.array([float(i) for i in items[1:]])
        id += 1
    file.close()
    return(embedding, word_id_dict, id_word_dict)

embedding, word_id_dict, id_word_dict = read_glove("../../../datasets/glove_6b/glove.6B.50d.txt")

def read_data(filename):
    file = open(filename)
    chapter_input = []
    data = []
    for line in file:
        items = re.sub('[?.]', '', line).lower().split()
        if items[0] == '1':
            chapter_input = items[1:] + ['.']
        elif items[-1].isdigit():
            data.append({'I': chapter_input,
                         'Q': items[1:-2],
                         'A': [items[-2]]})
        else:
            chapter_input = chapter_input + items[1:] + ['.']
    file.close()
    return(data)

def max_len(data, iqa):
    max_len = 0
    for i in data:
        max_len = max(max_len, len(i[iqa]))
    return(max_len)

def embed_and_pad(data):
    inputs = np.zeros([len(data), max_len(data, 'I'), embedding_dim])
    questions = np.zeros([len(data), max_len(data, 'Q'), embedding_dim])
    for index, row in enumerate(data):
        inputs[index,0:len(row['I']),:] = embedding[[word_id_dict[token] for token in row['I']]]
        questions[index,0:len(row['Q']),:] = embedding[[word_id_dict[token] for token in row['Q']]]
    return((inputs, questions))

def get_answer_index(data):
    answers = np.zeros(num_rows)
    for index, row in enumerate(data):
        answers[index] = word_id_dict[row['A'][0]]
    return(answers)

def get_input_sequence_lengths(data):
    input_sequence_lengths = []
    for i in data:
        input_sequence_lengths.append(len(i['I']))
    return(input_sequence_lengths)

def get_input_period_boolean(data):
    input_period_boolean = np.zeros((num_rows, max_input_len), dtype=bool)
    for index, row in enumerate(data):
        input_period_boolean[index, [i for i, j in enumerate(row['I']) if j=='.']] = True
    return(input_period_boolean)

def get_max_facts(input_period_boolean):
    max_facts = max([sum(i) for i in input_period_boolean])
    return(max_facts)

In [4]:
data = read_data("../../../datasets/facebook_babi/tasks_1-20_v1-2/en/qa1_single-supporting-fact_train.txt")
max_input_len = max_len(data, 'I')
max_question_len = max_len(data, 'Q')
max_answer_len = max_len(data, 'A')
data_inputs, data_questions = embed_and_pad(data)
data_answers = get_answer_index(data)
input_sequence_lengths = get_input_sequence_lengths(data)
input_period_boolean = get_input_period_boolean(data)
max_facts = get_max_facts(input_period_boolean)

# DMN Implementation

In [5]:
## Placeholders
inputs = tf.placeholder(tf.float32, shape=[batch_size, max_input_len, embedding_dim])
questions = tf.placeholder(tf.float32, shape=[batch_size, max_question_len, embedding_dim])
answers = tf.placeholder(tf.int32, shape=[batch_size])
periods = tf.placeholder(tf.bool, shape=[batch_size, max_input_len])
input_sequence_lengths_placeholder = tf.placeholder(tf.int32, shape=[batch_size])

In [8]:
# with tf.variable_scope('trial7') as scope:
# #     gru_cell_2 = tf.contrib.rnn.GRUCell(hidden_layer_size)
#     trial_data = tf.constant(np.random.rand(10, 1000, 320), dtype = tf.float32)
    
#     w1 = tf.get_variable("w1", [4*hidden_layer_size, hidden_layer_size],
#             initializer=tf.random_normal_initializer())
# #     layer1 = tf.nn.tanh(tf.matmul(z, w1))
# #     scope.reuse_variables()        
#     w2 = tf.get_variable("weights2", [hidden_layer_size, hidden_layer_size],
#             initializer=tf.random_normal_initializer())
# #     g_t = tf.nn.sigmoid(tf.matmul(layer1, w2))
    
#     for i in range(3):
#         for j in range(10):
#             inp = trial_data[j, :, :]
#             layer1 = tf.nn.tanh(tf.matmul(inp, w1))
#             g_t = tf.nn.sigmoid(tf.matmul(layer1, w2))
# #     m = q
# #     for i in range(10):
# #         inp = trial_data[0, :, :]
# #         m = gru_cell_2(inp, m)[1]

In [9]:

# with tf.variable_scope('trial5') as scope:
#     gru_cell_2 = tf.contrib.rnn.GRUCell(hidden_layer_size)
#     trial_data = tf.constant(np.random.rand(10, 1000, 80), dtype = tf.float32)
#     m = q
#     for i in range(10):
#         inp = trial_data[0, :, :]
#         m = gru_cell_2(inp, m)[1]

In [10]:
# ## Question module
# with tf.variable_scope('question_module_1') as scope:
#     _, q = tf.nn.dynamic_rnn(gru_cell,
#                                   questions,
#                                   dtype=tf.float64)

# ## Input module
# with tf.variable_scope('input_module_1') as scope:
#     i_output, _ = tf.nn.dynamic_rnn(gru_cell,
#                                           inputs,
#                                           dtype=tf.float32,
#                                           sequence_length=input_sequence_lengths)
#     c = []
#     for index in range(num_rows):
#         states_at_periods = tf.boolean_mask(i_output[index,:,:], periods[index,:])
#         padding = tf.zeros([max_facts - tf.shape(states_at_periods)[0], hidden_layer_size])
#         c.append(tf.concat([states_at_periods, padding], 0))
#     c = tf.unstack(tf.transpose(tf.stack(c), perm=[1,0,2]), num = max_facts)
#     c_stacked = tf.transpose(tf.stack(c), perm = [1, 0, 2])

In [6]:
## Question and Input module
with tf.variable_scope('question_and_input_module') as scope:
    input_gru_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)

    _, q = tf.nn.dynamic_rnn(input_gru_cell,
                                  questions,
                                  dtype=tf.float32)
    scope.reuse_variables()

    i_output, _ = tf.nn.dynamic_rnn(input_gru_cell,
                                          inputs,
                                          dtype=tf.float32,
                                          sequence_length=input_sequence_lengths_placeholder)
    
    c = []
    for index in range(batch_size):
        states_at_periods = tf.boolean_mask(i_output[index,:,:], periods[index,:])
        padding = tf.zeros([max_facts - tf.shape(states_at_periods)[0], hidden_layer_size])
        c.append(tf.concat([states_at_periods, padding], 0))
    c = tf.unstack(tf.transpose(tf.stack(c), perm=[1,0,2]), num = max_facts)
    c_stacked = tf.transpose(tf.stack(c), perm = [1, 0, 2])

In [7]:
## Episodic Memory module
with tf.variable_scope('episodic_memory_module'):
    episodic_gru_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
    w1 = tf.get_variable("weight_1", [4*hidden_layer_size, hidden_layer_size],
                        initializer=tf.random_normal_initializer())
    w2 = tf.get_variable("weight_2", [hidden_layer_size, hidden_layer_size],
                        initializer=tf.random_normal_initializer())
    
    m_i = q
    for step in range(num_steps):
#         h_t = tf.zeros_like(c[0])
        e_i = tf.zeros_like(c[0])
        g = []
        for c_t in c:
            # calculate g
            z = tf.concat([tf.multiply(c_t, q), 
                           tf.multiply(c_t, m_i),
                           tf.abs(tf.subtract(c_t, q)),
                           tf.abs(tf.subtract(c_t, m_i))], 1) # [N x 4d]
#             with tf.variable_scope("layers", reuse = True) as scope:
# #                 w1 = tf.get_variable("weights1", [4*hidden_layer_size, hidden_layer_size],
# #                         initializer=tf.random_normal_initializer())
#                 layer1 = tf.nn.tanh(tf.matmul(z, w1))
#                 scope.reuse_variables()        
# #                 w2 = tf.get_variable("weights2", [hidden_layer_size, hidden_layer_size],
# #                         initializer=tf.random_normal_initializer())
#                 g_t = tf.nn.sigmoid(tf.matmul(layer1, w2))
# #                 scope.reuse_variables()
# #             layer1 = tf.contrib.layers.fully_connected(inputs = z,
# #                                                       num_outputs = hidden_layer_size,
# #                                                       activation_fn = tf.nn.tanh,
# #                                                       reuse = None,
# #                                                       scope = "layer_1")
# # #             with tf.variable_scope("layer_22"):
# #             g_t = tf.contrib.layers.fully_connected(inputs = layer1,
# #                                                       num_outputs = 1,
# #                                                       activation_fn = tf.nn.sigmoid,
# #                                                    reuse = None,
# #                                                    scope = "layer_2")
            layer1 = tf.nn.tanh(tf.matmul(z, w1))
            g_t = tf.nn.sigmoid(tf.matmul(layer1, w2))
            g.append(g_t)
        g = tf.transpose(tf.stack(g), perm = [1, 0, 2])
        g_softmax = tf.nn.softmax(g, dim = 1)
        e_i = tf.reduce_sum(tf.multiply(g_softmax, c_stacked), axis = 1)

#             # compute episode for pass i
#             h_t = tf.multiply(g, gru_cell(c_t, h_t)[1]) + tf.multiply(1 - g, h_t)
#             scope.reuse_variables()
#         # episode is the last hidden state
#         e_i = h_t

        m_i = episodic_gru_cell(e_i, m_i)[1]
#         scope.reuse_variables()
        


In [8]:
## Answer module
with tf.variable_scope('answer_module') as scope:
    logits = tf.contrib.layers.fully_connected(inputs = m_i,
                                              num_outputs = vocab_size,
                                              activation_fn = None)
    
    ## Loss and metrics
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = answers)
    loss = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
    
    prediction = tf.cast(tf.argmax(logits, 1), 'int32')
    num_correct = tf.reduce_sum(tf.cast(tf.equal(prediction, answers), tf.int32))
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, answers), tf.float32))


# Training

In [9]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [10]:
def get_batch(batch_number):
    return {inputs: data_inputs[batch_number*batch_size: (batch_number+1)*batch_size],
            questions: data_questions[batch_number*batch_size: (batch_number+1)*batch_size],
            answers: data_answers[batch_number*batch_size: (batch_number+1)*batch_size],
            periods: input_period_boolean[batch_number*batch_size: (batch_number+1)*batch_size],
            input_sequence_lengths_placeholder: input_sequence_lengths[batch_number*batch_size: (batch_number+1)*batch_size]
           }

In [12]:
def run_model(sess, num_epochs):
    start_time = time.time()
    for epoch in range(num_epochs):
        epoch_loss = epoch_num_correct =0
        for batch_idx in range(num_batches):
            batch_loss, _, batch_num_correct, batch_accuracy, res_optimizer = sess.run((loss, optimizer, num_correct, accuracy, optimizer), 
                                                                        feed_dict=get_batch(batch_idx))
            epoch_loss += batch_loss
            epoch_num_correct += batch_num_correct
#             epoch_accuracy += batch_accuracy
        print("Epoch %d: %.2f%% complete, %d mins, Loss: %.9f, Num correct: %d" % (epoch, 
                                                                               epoch*100/num_epochs,
                                                                                (time.time() - start_time)/60,
                                                                               epoch_loss, 
                                                                               epoch_num_correct))
    end_time = time.time()
    print("Duration: %d mins" % int((end_time - start_time)/60))

In [13]:
run_model(sess, 50)

Epoch 0: 0.00% complete, 0 mins, Loss: 83.210313797, Num correct: 152, Accuracy: 152.00%
Epoch 1: 0.00% complete, 0 mins, Loss: 18.377319932, Num correct: 158, Accuracy: 158.00%
Epoch 2: 0.00% complete, 0 mins, Loss: 18.358591676, Num correct: 142, Accuracy: 142.00%
Epoch 3: 0.00% complete, 0 mins, Loss: 18.104773045, Num correct: 133, Accuracy: 133.00%
Epoch 4: 0.00% complete, 0 mins, Loss: 18.138466835, Num correct: 141, Accuracy: 141.00%
Epoch 5: 0.00% complete, 0 mins, Loss: 18.227027893, Num correct: 145, Accuracy: 145.00%
Epoch 6: 0.00% complete, 0 mins, Loss: 18.191586494, Num correct: 144, Accuracy: 144.00%
Epoch 7: 0.00% complete, 0 mins, Loss: 18.203004122, Num correct: 144, Accuracy: 144.00%
Epoch 8: 0.00% complete, 0 mins, Loss: 18.227482438, Num correct: 145, Accuracy: 145.00%
Epoch 9: 0.00% complete, 0 mins, Loss: 18.230713606, Num correct: 144, Accuracy: 144.00%
Epoch 10: 0.00% complete, 0 mins, Loss: 18.236656547, Num correct: 144, Accuracy: 144.00%
Epoch 11: 0.00% comp

In [14]:
run_model(sess, 1000)

Epoch 0: 0.00% complete, 0 mins, Loss: 18.284256458, Num correct: 144, Accuracy: 144.00%
Epoch 1: 0.00% complete, 0 mins, Loss: 18.284379840, Num correct: 144, Accuracy: 144.00%
Epoch 2: 0.00% complete, 0 mins, Loss: 18.284500837, Num correct: 144, Accuracy: 144.00%
Epoch 3: 0.00% complete, 0 mins, Loss: 18.284611702, Num correct: 144, Accuracy: 144.00%
Epoch 4: 0.00% complete, 0 mins, Loss: 18.284720302, Num correct: 144, Accuracy: 144.00%
Epoch 5: 0.00% complete, 0 mins, Loss: 18.284821749, Num correct: 144, Accuracy: 144.00%
Epoch 6: 0.00% complete, 0 mins, Loss: 18.284917235, Num correct: 144, Accuracy: 144.00%
Epoch 7: 0.00% complete, 0 mins, Loss: 18.285007954, Num correct: 144, Accuracy: 144.00%
Epoch 8: 0.00% complete, 0 mins, Loss: 18.285097480, Num correct: 144, Accuracy: 144.00%
Epoch 9: 0.00% complete, 0 mins, Loss: 18.285177469, Num correct: 144, Accuracy: 144.00%
Epoch 10: 0.00% complete, 0 mins, Loss: 18.285256028, Num correct: 144, Accuracy: 144.00%
Epoch 11: 0.00% comp

Epoch 92: 0.00% complete, 1 mins, Loss: 18.287165642, Num correct: 144, Accuracy: 144.00%
Epoch 93: 0.00% complete, 1 mins, Loss: 18.287169099, Num correct: 144, Accuracy: 144.00%
Epoch 94: 0.00% complete, 1 mins, Loss: 18.287173867, Num correct: 144, Accuracy: 144.00%
Epoch 95: 0.00% complete, 1 mins, Loss: 18.287177563, Num correct: 144, Accuracy: 144.00%
Epoch 96: 0.00% complete, 1 mins, Loss: 18.287181616, Num correct: 144, Accuracy: 144.00%
Epoch 97: 0.00% complete, 1 mins, Loss: 18.287186623, Num correct: 144, Accuracy: 144.00%
Epoch 98: 0.00% complete, 1 mins, Loss: 18.287192702, Num correct: 144, Accuracy: 144.00%
Epoch 99: 0.00% complete, 1 mins, Loss: 18.287196875, Num correct: 144, Accuracy: 144.00%
Epoch 100: 0.00% complete, 1 mins, Loss: 18.287204146, Num correct: 144, Accuracy: 144.00%
Epoch 101: 0.00% complete, 1 mins, Loss: 18.287206054, Num correct: 144, Accuracy: 144.00%
Epoch 102: 0.00% complete, 1 mins, Loss: 18.287210464, Num correct: 144, Accuracy: 144.00%
Epoch 1

Epoch 183: 0.00% complete, 3 mins, Loss: 18.287377000, Num correct: 144, Accuracy: 144.00%
Epoch 184: 0.00% complete, 3 mins, Loss: 18.287377715, Num correct: 144, Accuracy: 144.00%
Epoch 185: 0.00% complete, 3 mins, Loss: 18.287379622, Num correct: 144, Accuracy: 144.00%
Epoch 186: 0.00% complete, 3 mins, Loss: 18.287380815, Num correct: 144, Accuracy: 144.00%
Epoch 187: 0.00% complete, 3 mins, Loss: 18.287381291, Num correct: 144, Accuracy: 144.00%
Epoch 188: 0.00% complete, 3 mins, Loss: 18.287384033, Num correct: 144, Accuracy: 144.00%
Epoch 189: 0.00% complete, 3 mins, Loss: 18.287383914, Num correct: 144, Accuracy: 144.00%
Epoch 190: 0.00% complete, 3 mins, Loss: 18.287384987, Num correct: 144, Accuracy: 144.00%
Epoch 191: 0.00% complete, 3 mins, Loss: 18.287384033, Num correct: 144, Accuracy: 144.00%
Epoch 192: 0.00% complete, 3 mins, Loss: 18.287385225, Num correct: 144, Accuracy: 144.00%
Epoch 193: 0.00% complete, 3 mins, Loss: 18.287386656, Num correct: 144, Accuracy: 144.00%

KeyboardInterrupt: 

In [14]:
res_pred = sess.run(prediction, feed_dict=get_batch(9))
res_pred

array([2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637, 2637,
       2637], dtype=int32)

In [15]:
id_word_dict[2637]

'garden'

# Testing

In [None]:
# data = read_data("../../datasets/facebook_babi/tasks_1-20_v1-2/en/qa1_single-supporting-fact_test.txt")
# max_input_len = max_len(data, 'I')
# max_question_len = max_len(data, 'Q')
# max_answer_len = max_len(data, 'A')
# data_inputs, data_questions = embed_and_pad(data)
# data_answers = get_answer_index(data)
# input_sequence_lengths = get_input_sequence_lengths(data)
# input_period_boolean = get_input_period_boolean(data)
# max_facts = get_max_facts(input_period_boolean)

In [None]:
# test_loss, test_prediction, test_num_correct, test_accuracy = sess.run((loss, prediction, num_correct, accuracy), feed_dict={inputs: data_inputs, 
#                                                         questions: data_questions, 
#                                                         answers: data_answers, 
#                                                         periods: input_period_boolean})