In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
import re
import time

# Variables

In [2]:
vocab_size = 400000
embedding_dim = 50
max_input_len = None
max_question_len = None
max_answer_len = None
input_sequence_lengths = None
input_period_boolean = None
input_period_indices = None
max_facts = None
hidden_layer_size = 10
num_rows = 1000
num_steps = 3

# Read in glove

In [3]:
file = open("../../datasets/glove.6B/glove.6B.50d.txt")    
embedding = np.ndarray([vocab_size, embedding_dim])
word_id_dict = {}
id = 0
for line in file:
    items = line.split(' ')
    word_id_dict[items[0]] = id
    embedding[id,:] = np.array([float(i) for i in items[1:]])
    id += 1
file.close()

# Parsing training data

In [29]:
file = open("../../datasets/facebook_babi/tasks_1-20_v1-2/en/qa1_single-supporting-fact_train.txt")

chapter_input = []
data = []
for line in file:
    items = re.sub('[?.]', '', line).lower().split()
    if items[0] == '1':
        chapter_input = items[1:] + ['.']
    elif items[-1].isdigit():
        data.append({'I': chapter_input,
                     'Q': items[1:-2],
                     'A': [items[-2]]})
    else:
        chapter_input = chapter_input + items[1:] + ['.']
file.close()

def max_len(data, iqa):
    max_len = 0
    for i in data:
        max_len = max(max_len, len(i[iqa]))
    return(max_len)

max_input_len = max_len(data, 'I')
max_question_len = max_len(data, 'Q')
max_answer_len = max_len(data, 'A')

input_sequence_lengths = []
for i in data:
    input_sequence_lengths.append(len(i['I']))
    
input_period_boolean = np.zeros((num_rows, max_input_len), dtype=bool)
for index, row in enumerate(data):
    input_period_boolean[index, [i for i, j in enumerate(row['I']) if j=='.']] = True
    
input_period_indices = []
for i in data:
    input_period_indices.append([index for index, j in enumerate(i['I']) if j=='.'])
    
max_facts = max([len(i) for i in input_period_indices])

def embed_and_pad(data):
    inputs = np.zeros([len(data), max_len(data, 'I'), embedding_dim])
    questions = np.zeros([len(data), max_len(data, 'Q'), embedding_dim])
#     answers = np.zeros([len(data), max_len(data, 'A'), embedding_dim])
    for index, row in enumerate(data):
        inputs[index,0:len(row['I']),:] = embedding[[word_id_dict[token] for token in row['I']]]
        questions[index,0:len(row['Q']),:] = embedding[[word_id_dict[token] for token in row['Q']]]
#         answers[index,0:len(row['A']),:] = embedding[[word_id_dict[token] for token in row['A']]]
    return((inputs, questions))

data_inputs, data_questions = embed_and_pad(data)

In [22]:
def one_hot_encoding(data):
    answers = np.zeros((num_rows, vocab_size))
    for index, row in enumerate(data):
        answers[index, word_id_dict[row['A'][0]]] = 1
    return(answers)

In [30]:
data_answers = one_hot_encoding(data)

# DMN pseudocode

In [None]:
# # answer module

# Wa = tf.placeholder(...)
# y_hat = tf.matmul(m, Wa)

# cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y_hat, answer)
# loss = tf.reduce_mean(cross_entropy)
# optimizer = tf.train.AdadeltaOptimizer(learning_rate)
# opt_op = optimizer.minimize(loss)

In [None]:
# # for demo
# return(embeddings(argmax(y_hat)))


# DMN Implementation (God help me)

In [35]:
# placeholder for inputs
inputs = tf.placeholder(tf.float32, shape=[num_rows, max_input_len, embedding_dim])

# placeholder for questions
questions = tf.placeholder(tf.float32, shape=[num_rows, max_question_len, embedding_dim])

# placeholder for answers
answers = tf.placeholder(tf.int32, shape=[num_rows, vocab_size])

# placeholder for periods in inputs
periods = tf.placeholder(tf.bool, shape=[num_rows, max_input_len])

In [6]:
gru_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)

In [7]:
## Question module
with tf.variable_scope('question_module'):
    _, q = tf.nn.dynamic_rnn(gru_cell,
                                  questions,
                                  dtype=tf.float32)

In [8]:
## Input module
with tf.variable_scope('input_module'):
    i_output, _ = tf.nn.dynamic_rnn(gru_cell,
                                          inputs,
                                          dtype=tf.float32,
                                          sequence_length=input_sequence_lengths)
c = []
for index in range(num_rows):
    states_at_periods = tf.boolean_mask(i_output[index,:,:], periods[index,:])
    padding = tf.zeros([max_facts - tf.shape(states_at_periods)[0], hidden_layer_size])
    c.append(tf.concat([states_at_periods, padding], 0))
c = tf.unstack(tf.transpose(tf.stack(c), perm=[1,0,2]), num = max_facts)
# c_transposed = [tf.transpose(c_t) for c_t in c]

In [None]:
# facts = np.zeros((max_facts, num_rows, hidden_layer_size))
# for i in range(num_rows):
#     facts[0:sum(input_period_boolean[i]), i, :] = res[i,input_period_boolean[i],:]

In [9]:
## Episodic Memory module
with tf.variable_scope('episodic_memory_module') as scope:
    m_i = q
    for step in range(num_steps):
        h_t = tf.zeros_like(c[0])
        e_i = tf.zeros_like(c[0])
        for c_t in c:
            # calculate g
            z = tf.concat([c_t, m_i, q, 
                           tf.multiply(c_t, q), 
                           tf.multiply(c_t, m_i),
                           tf.abs(tf.subtract(c_t, q)),
                           tf.abs(tf.subtract(c_t, m_i))], 1) # need to add 2 more terms in there (V2)
            layer1 = tf.contrib.layers.fully_connected(inputs = z,
                                                      num_outputs = hidden_layer_size,
                                                      activation_fn = tf.nn.tanh,
                                                      reuse = True,
                                                      scope = 'g_layer_1')
            g = tf.contrib.layers.fully_connected(inputs = layer1,
                                                      num_outputs = 1,
                                                      activation_fn = tf.nn.sigmoid,
                                                      reuse = True,
                                                      scope = 'g_layer_2')
            # from section 4.1
            e_i = tf.add(e_i, tf.multiply(tf.nn.softmax(g), c_t))
    #             # compute episode for pass i
    #             h_t = tf.multiply(g, gru_cell(c_t, h_t)) + tf.multiply(tf.subtract(1, g), h_t)
    #         # episode is the last hidden state
    #         e_i = h_t
        m_i = gru_cell(e_i, m_i)[0]
        scope.reuse_variables()

In [36]:
## Answer module
with tf.variable_scope('answer_module'):
    logits = tf.contrib.layers.fully_connected(inputs = m_i,
                                              num_outputs = vocab_size,
                                              activation_fn = None)
# change to sparse_softmax_cross_entropy_with_logits
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = answers)
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdadeltaOptimizer(0.002).minimize(loss)

prediction = tf.cast(tf.argmax(logits, 1), 'int32')
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, answers), tf.float32))

ValueError: Variable answer_module/fully_connected/weights/Adadelta/ already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

  File "<ipython-input-34-a04efb7b8b9a>", line 9, in <module>
    optimizer = tf.train.AdadeltaOptimizer(0.002).minimize(loss)
  File "/Users/amolmane/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/Users/amolmane/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
    if self.run_code(code, result):


In [10]:
start_time = time.time()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
res = sess.run((m_i), feed_dict={inputs: data_inputs, 
                                    questions: data_questions, 
                                    answers: data_answers, 
                                    periods: input_period_boolean})
# res1,res2,res3 = sess.run((z, layer1, g), feed_dict={inputs: data_inputs, 
#                                     questions: data_questions, 
#                                     answers: data_answers, 
#                                     periods: input_period_boolean})
end_time = time.time()
print("Duration: %.2f s" % (end_time - start_time))

Duration: 56.92 s


In [14]:
# res

In [None]:
# res1.shape

In [None]:
# res2.shape

In [None]:
# res3.shape