### 용어
context: question에 대한 hint sentence의 묶음 <br>
sentence: context를 이루고 있는 문장 하나 <br>
question: 질문 (문장 하나)

In [53]:
import itertools
import numpy as np
import os
import pandas as pd
import pickle
import sys
sys.path.append('../')

from datetime import datetime
import tensorflow as tf
from tensorflow.contrib.layers import batch_norm
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib import rnn
from tensorflow.contrib import slim
from time import time
from tqdm import tqdm

In [2]:
# question, answer, context, label
with open('./babi_preprocessd/train_dataset_masked.pkl', 'rb') as f:
    train = pickle.load(f)
with open('./babi_preprocessd/val_dataset_masked.pkl', 'rb') as f:
    val = pickle.load(f)
with open('./babi_preprocessd/test_dataset_masked.pkl', 'rb') as f:
    test = pickle.load(f)   

In [93]:
with open('./babi_preprocessd/c_word_set.pkl', 'rb') as f:
    c_word_set = pickle.load(f)
with open('./babi_preprocessd/q_word_set.pkl', 'rb') as f:
    q_word_set = pickle.load(f)
with open('./babi_preprocessd/a_word_set.pkl', 'rb') as f:
    a_word_set = pickle.load(f)
with open('./babi_preprocessd/cqa_word_set.pkl', 'rb') as f:
    cqa_word_set = pickle.load(f)

In [4]:
[train_q, train_a, train_c, train_l, train_c_real_len, train_q_real_len] = train
[val_q, val_a, val_c, val_l, val_c_real_len, val_q_real_len] = val
[test_q, test_a, test_c, test_l, test_c_real_len, test_q_real_len] = test

In [5]:
c_max_len = 20
s_max_len = 12
q_max_len = 12
mask_index = 0

In [56]:
# parameters
learning_rate = 2e-4
batch_size = 64
iter_time = 150
display_step = 100
seed = 9

* context words: 124 <br>
* question words: 88 <br>
* answer words: 41 <br>
* all words: 159 <br>
* s,q max len: 12 <br>
* c max len: 20

#### LSTM for context
* 32 unit LSTM

#### LSTM for question
* 32 unit LSTM

In [7]:
# model parameter
s_input_step = s_max_len
s_hidden = 32
# ---
q_input_step = q_max_len
q_hidden = 32

In [8]:
# embed matrix
c_word_embed = 32
c_vocab_size = len(c_word_set)+1 # consider masking
c_word_embed_matrix = tf.Variable(tf.random_uniform(shape=[c_vocab_size, c_word_embed], minval=-1, maxval=1, seed= seed))
# ---
q_word_embed = 32
q_vocab_size = len(q_word_set)+1 # consider masking
q_word_embed_matrix = tf.Variable(tf.random_uniform(shape=[q_vocab_size, q_word_embed], minval=-1, maxval=1, seed=seed))

In [9]:
# input, output
c = tf.placeholder(dtype=tf.int32, shape=[batch_size, c_max_len, s_max_len])
c_real_len = tf.placeholder(dtype=tf.int32, shape=[batch_size, c_max_len])
s = tf.placeholder(dtype=tf.int32, shape=[batch_size, s_max_len]) 
q = tf.placeholder(dtype=tf.int32, shape=[batch_size, q_max_len])
q_real_len = tf.placeholder(dtype=tf.int32, shape=[batch_size])
l = tf.placeholder(dtype=tf.float32, shape=[batch_size, c_max_len, c_max_len])
a = tf.placeholder(dtype=tf.float32, shape=[batch_size, len(cqa_word_set)])
training_phase = tf.placeholder(dtype=tf.bool)

In [10]:
def contextLSTM(c, l, c_real_len, reuse=True, scope= "contextLSTM"):
    
    def sentenceLSTM(s, s_real_len, s_hidden=s_hidden, s_max_len= s_max_len, reuse=reuse, scope= "sentenceLSTM"):
        """
        embedding sentence

        Arguments
            s: sentence (word index list), shape = [batch_size*20, 12]
            s_real_len: length of the sentence before zero padding, int32

        Returns
            embedded_s: embedded sentence, shape = [batch_size*20, 32]
        """
        embedded_sent_word = tf.nn.embedding_lookup(c_word_embed_matrix, s)
        s_input = tf.unstack(embedded_sent_word, num=s_max_len, axis=1)
        lstm_cell = rnn.BasicLSTMCell(s_hidden, reuse=reuse)
        outputs, _ = rnn.static_rnn(lstm_cell, s_input, dtype=tf.float32, scope= scope)
        # 'outputs' is a list of output at every timestep, we pack them in a Tensor
        outputs = tf.stack(outputs)
        # and change back dimension to [batch_size(64)*c_max_len(20), s_max_len(12), s_hidden(32)]
        outputs = tf.transpose(outputs, [1, 0, 2])
        # Start indices for each sample
        index = tf.range(0, batch_size*c_max_len) * (s_max_len) + (s_real_len-1)
        # Indexing
        outputs = tf.gather(tf.reshape(outputs, [-1, s_hidden]), index)
        return outputs
    
    """
    Args
        c: list of sentences, shape = [batch_size, 20, 12]
        l: list of labels, shape = [batch_size, 20, 20]
        c_real_len: list of real length, shape = [batch_size, 20]
        
    Returns
        tagged_c_objects: list of embedded sentence + label, shape = [batch_size, 52] 20개
        len(tagged_c_objects) = 20
    """
    sentences = tf.reshape(c, shape=[-1, s_max_len]) # batch_size*20, 12
    real_lens = tf.reshape(c_real_len, shape=[-1]) # batch_size*20, 
    labels = tf.reshape(l, shape=[-1, c_max_len]) # batch_size*20, 20
    
    s_embedded = sentenceLSTM(sentences, real_lens, reuse=reuse) # batch_size*20, 32
    c_embedded = tf.concat([s_embedded, labels], axis= 1) # batch_size*20, 52
    c_embedded = tf.reshape(c_embedded, shape=[batch_size, c_max_len, c_max_len+c_word_embed])
    tagged_c_objects = tf.unstack(c_embedded, axis=1)
                                 
    return tagged_c_objects

In [11]:
def questionLSTM(q, q_real_len, q_hidden=q_hidden, reuse=True, scope = "questionLSTM"):
    """
    Args
        q: zero padded qeustions, shape=[batch_size, q_max_len]
        q_real_len: original question length, shape = [batch_size, 1]
        
    Returns
        embedded_q: embedded questions, shape = [batch_size, q_hidden(32)]
    """
    embedded_q_word = tf.nn.embedding_lookup(q_word_embed_matrix, q)
    q_input = tf.unstack(embedded_q_word, num=q_max_len, axis=1)
    lstm_cell = rnn.BasicLSTMCell(q_hidden, reuse=reuse)
    outputs, _ = rnn.static_rnn(lstm_cell, q_input, dtype=tf.float32, scope = scope)
    # 'outputs' is a list of output at every timestep, we pack them in a Tensor
    outputs = tf.stack(outputs)
    # and change back dimension to [batch_size(64), q_max_len(12), s_hidden(32)]
    outputs = tf.transpose(outputs, [1, 0, 2])
    # Start indices for each sample
    index = tf.range(0, batch_size) * (q_max_len) + (q_real_len-1)
    # Indexing
    outputs = tf.gather(tf.reshape(outputs, [-1, s_hidden]), index)
    return outputs

In [12]:
def convert_to_RN_input(embedded_c, embedded_q):
    """
    
    Args
        embedded_c: output of contextLSTM, 20 length list of embedded sentences
        embedded_q: output of questionLSTM, embedded question
        
    Returns
        RN_input: input for RN g_theta, shape = [batch_size*190, (52+52+32)]
        considered batch_size and all combinations
    """
    # 20 combination 2 --> total 190 object pairs
    object_pairs = list(itertools.combinations(embedded_c, 2))
    # concatenate with question
    RN_inputs = []
    for object_pair in object_pairs:
        RN_input = tf.concat([object_pair[0], object_pair[1], embedded_q], axis=1)
        RN_inputs.append(RN_input)
    
    return tf.concat(RN_inputs, axis=0)

#### RN
* $g_\theta$: 4 layer, all 256 units MLP, ReLU
* $f_\phi$: 3 layer, 256/512/159 units MLP, ReLU, sotmax

In [13]:
g_units = [256,256,256,256]
f_units = [256,512,159]

In [14]:
def fc(inputs, output_shape, activation_fcn = tf.nn.relu, name="fc"):
    output = slim.fully_connected(inputs, int(output_shape), activation_fn=activation_fcn)
    return output

In [15]:
def batch_norm_relu(inputs, output_shape, phase=True, scope=None, activation= True):
    with tf.variable_scope(scope):
        h1 = fully_connected(inputs, output_shape, activation_fn=None, scope='dense')
        h2 = batch_norm(h1, decay = 0.95 ,center=True, scale=True, 
                        is_training=phase, scope='bn', updates_collections= None)
        if activation:
            o = tf.nn.relu(h2, 'relu')
        else:
            o = h2
        return o

In [16]:
def g_theta(RN_input, scope= 'g_theta', reuse= True, phase=True): 
    """
    Args
        RN_input: [o_i, o_j, q], shape = [batch_size*190, 136]
        
    Returns
        g_output: shape = [190, batch_size, 256]
    """
    input_dim = RN_input.shape[1]
    with tf.variable_scope(scope, reuse= reuse) as scope:
        # if not reuse: log.warn(scope.name): reuse하는지 확인하기 위한 출력옵션
#         if not reuse:
#             print(scope.name)
        g_1 = batch_norm_relu(RN_input, g_units[0], scope= "g_1", phase=phase)
        g_2 = batch_norm_relu(g_1, g_units[1], scope= "g_2", phase=phase)
        g_3 = batch_norm_relu(g_2, g_units[2], scope= "g_3", phase=phase)
        g_4 = batch_norm_relu(g_3, g_units[3], scope= "g_4", phase=phase)
    g_output = tf.reshape(g_4, shape=[190, batch_size, g_units[3]])
    return g_output

In [17]:
def f_phi(g, scope= 'f_phi', reuse=True, phase=True):
    """
    Args
        g: g_theta result, shape = [190, batch_size, 256]
        
    Returns
        f_output: shape = [batch_size, 159]
    """
    f_input = tf.reduce_sum(g, axis=0)
    with tf.variable_scope(scope, reuse=reuse) as scope:
        f_1 = batch_norm_relu(f_input, f_units[0], scope= "f_1", phase=phase)
        f_2 = batch_norm_relu(f_1, f_units[1], scope= "f_2", phase=phase)
        f_3 = batch_norm_relu(f_2, f_units[2], activation= None, scope= "f_3", phase=phase)
    return f_3

reuse...

In [18]:
def model(c, q, l, c_real_len, q_real_len, phase=True):
    embedded_c = contextLSTM(c, l, c_real_len, reuse=None)
    embedded_q = questionLSTM(q, q_real_len, reuse=None)
    RN_input = convert_to_RN_input(embedded_c, embedded_q)
    f_input = g_theta(RN_input, reuse=None, phase=phase)
    prediction = f_phi(f_input, reuse=None, phase=phase)
    return prediction

In [19]:
prediction = model(c,q,l,c_real_len, q_real_len, training_phase)

In [20]:
tf.trainable_variables()

[<tf.Variable 'Variable:0' shape=(125, 32) dtype=float32_ref>,
 <tf.Variable 'Variable_1:0' shape=(89, 32) dtype=float32_ref>,
 <tf.Variable 'sentenceLSTM/basic_lstm_cell/weights:0' shape=(64, 128) dtype=float32_ref>,
 <tf.Variable 'sentenceLSTM/basic_lstm_cell/biases:0' shape=(128,) dtype=float32_ref>,
 <tf.Variable 'questionLSTM/basic_lstm_cell/weights:0' shape=(64, 128) dtype=float32_ref>,
 <tf.Variable 'questionLSTM/basic_lstm_cell/biases:0' shape=(128,) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_1/dense/weights:0' shape=(136, 256) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_1/dense/biases:0' shape=(256,) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_1/bn/beta:0' shape=(256,) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_1/bn/gamma:0' shape=(256,) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_2/dense/weights:0' shape=(256, 256) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_2/dense/biases:0' shape=(256,) dtype=float32_ref>,
 <tf.Variable 'g_theta/g_2/bn/beta:0' shape=(256,) dtype=flo

#### Optimization
* 64 mini batches
* cross-entropy loss function
* Adam optimizer
* learning rate: 2e-4

## Issues
1. multiple answer?

In [21]:
correct = tf.equal(tf.argmax(prediction, axis=1), tf.argmax(a, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [22]:
accuracy_train = tf.summary.scalar("accuracy_train", accuracy)

In [23]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=a))

In [24]:
loss_train = tf.summary.scalar("loss_train", loss)

In [25]:
global_step = tf.Variable(0, name="global_step", trainable=False)

In [26]:
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
optimizer = opt.minimize(loss, global_step=global_step)

## Train!

In [58]:
def batch_iter(c, q, l, a, c_real_len, q_real_len, batch_size=batch_size, num_epochs=iter_time, shuffle=True, is_training=True):
    """
    Generates a batch iterator for a dataset.
    """
    c = np.array(c)
    q = np.array(q)
    l = np.array(l)
    a = np.array(a)
    c_real_len = np.array(c_real_len)
    q_real_len = np.array(q_real_len)
    data_size = len(q)
    num_batches_per_epoch = int(data_size/batch_size) + 1
    for epoch in range(num_epochs):
        if is_training:
            alarm.send_message('RN training...epoch {}'.format(epoch+1))
        print("In epoch >> " + str(epoch + 1))
        print("num batches per epoch is: " + str(num_batches_per_epoch))
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            c_shuffled = c[shuffle_indices]
            q_shuffled = q[shuffle_indices]
            l_shuffled = l[shuffle_indices]
            a_shuffled = a[shuffle_indices]
            c_real_len_shuffled = c_real_len[shuffle_indices]
            q_real_len_shuffled = q_real_len[shuffle_indices]
        else:
            c_shuffled = c
            q_shuffled = q
            l_shuffled = l
            a_shuffled = a
            c_real_len_shuffled = c_real_len
            q_real_len_shuffled = q_real_len
        
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = (batch_num + 1) * batch_size
            if end_index < data_size:
                c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = c_shuffled[start_index:end_index], q_shuffled[start_index:end_index], l_shuffled[start_index:end_index], a_shuffled[start_index:end_index], c_real_len_shuffled[start_index:end_index], q_real_len_shuffled[start_index:end_index]
            else:
                end_index = data_size
                start_index = end_index - batch_size
                c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = c_shuffled[start_index:end_index], q_shuffled[start_index:end_index], l_shuffled[start_index:end_index], a_shuffled[start_index:end_index], c_real_len_shuffled[start_index:end_index], q_real_len_shuffled[start_index:end_index]
            yield list(zip(c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch))

In [28]:
date = datetime.fromtimestamp(time()).strftime('%Y-%m-%d_%H:%M:%S')
model_id = 'RN' + date
save_dir = './babi_result/lookup_table/%s' %(model_id)
save_summary_path = os.path.join(save_dir, 'model_summary')
save_variable_path = os.path.join(save_dir, 'model_variables')

In [29]:
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    os.makedirs(save_summary_path)
    os.makedirs(save_variable_path)

In [38]:
train_summary_ops = tf.summary.merge([loss_train, accuracy_train])

In [31]:
loss_val = tf.summary.scalar('loss_val', loss)
acc_val = tf.summary.scalar('acc_val', accuracy)

In [39]:
val_summary_ops = tf.summary.merge([loss_val, acc_val])

In [40]:
start_time = time()
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    saver = tf.train.Saver(max_to_keep=4)
    summary_writer = tf.summary.FileWriter(save_summary_path, sess.graph)    
    print("====training====")   
    batch_train = batch_iter(train_c, train_q, train_l, train_a, train_c_real_len, train_q_real_len)
    for train in batch_train:
        c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = zip(*train)
        feed_dict={c:c_batch, q:q_batch, l:l_batch, a:a_batch, c_real_len:c_real_len_batch, q_real_len:q_real_len_batch, training_phase: True}
        current_step = sess.run(global_step, feed_dict=feed_dict)
        optimizer.run(feed_dict=feed_dict)
        train_summary = sess.run(train_summary_ops, feed_dict=feed_dict)
        summary_writer.add_summary(train_summary, current_step)
        if current_step % (display_step) == 0:
            print("step: {}".format(current_step))
            print("====validation start====")
            batch_val = batch_iter(val_c, val_q, val_l, val_a, val_c_real_len, val_q_real_len, num_epochs=1)
            accs = []
            for val in batch_val:
                c_val, q_val, l_val, a_val, c_real_len_val, q_real_len_val = zip(*val)
                feed_dict={c:c_val, q:q_val, l:l_val, a:a_val, c_real_len:c_real_len_val, q_real_len:q_real_len_val, training_phase: False}
                acc = accuracy.eval(feed_dict=feed_dict)
                accs.append(acc)
                val_summary= sess.run(val_summary_ops, feed_dict=feed_dict)
                summary_writer.add_summary(val_summary, current_step)
            print("Mean accuracy=" + str(sum(accs)/len(accs)))
            saver.save(sess, save_path = save_summary_path, global_step=current_step)
            print("====training====")
end_time = time()

====training====
In epoch >> 1
num batches per epoch is: 2812
step: 0
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.00425681089744
====training====
step: 100
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.338992387821
====training====
step: 200
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.442057291667
====training====
step: 300
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.485927483974
====training====
step: 400
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.491235977564
====training====
step: 500
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.507111378205
====training====
step: 600
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.515925480769
====training====
step: 700
====validation start====
In epoch >> 1
num batches pe

### Save model variables

In [41]:
fname = os.path.join(save_variable_path, 'model_variables.txt')
with open(fname, 'w') as f:
    print('seed: {}'.format(seed), file=f)
    print('learning rate: {}'.format(learning_rate), file=f)
    print('batch_size: {}'.format(batch_size), file=f)
    print('epoch: {}'.format(iter_time), file=f)
    print('activation_fcn: relu', file = f)
    print('iteration_time: {}'.format(end_time-start_time), file=f)

### Restore saved model

In [48]:
start_time = time()
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-28200.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./babi_result/lookup_table/RN2017-07-10_17:44:39/'))
    summary_writer = tf.summary.FileWriter(save_summary_path, sess.graph)    
    print("====training====")   
    batch_train = batch_iter(train_c, train_q, train_l, train_a, train_c_real_len, train_q_real_len)
    for train in batch_train:
        c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = zip(*train)
        feed_dict={c:c_batch, q:q_batch, l:l_batch, a:a_batch, c_real_len:c_real_len_batch, q_real_len:q_real_len_batch, training_phase: True}
        current_step = sess.run(global_step, feed_dict=feed_dict)
        optimizer.run(feed_dict=feed_dict)
        train_summary = sess.run(train_summary_ops, feed_dict=feed_dict)
        summary_writer.add_summary(train_summary, current_step)
        if current_step % (display_step) == 0:
            print("step: {}".format(current_step))
            print("====validation start====")
            batch_val = batch_iter(val_c, val_q, val_l, val_a, val_c_real_len, val_q_real_len, num_epochs=1)
            accs = []
            for val in batch_val:
                c_val, q_val, l_val, a_val, c_real_len_val, q_real_len_val = zip(*val)
                feed_dict={c:c_val, q:q_val, l:l_val, a:a_val, c_real_len:c_real_len_val, q_real_len:q_real_len_val, training_phase: False}
                acc = accuracy.eval(feed_dict=feed_dict)
                accs.append(acc)
                val_summary= sess.run(val_summary_ops, feed_dict=feed_dict)
                summary_writer.add_summary(val_summary, current_step)
            print("Mean accuracy=" + str(sum(accs)/len(accs)))
            new_saver.save(sess, save_path = save_summary_path, global_step=current_step)
            print("====training====")
end_time = time()

INFO:tensorflow:Restoring parameters from ./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-28200
====training====
In epoch >> 1
num batches per epoch is: 2812
step: 28300
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.844250801282
====training====
step: 28400
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.839643429487
====training====
step: 28500
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.844350961538
====training====
step: 28600
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.846003605769
====training====
step: 28700
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.84710536859
====training====
step: 28800
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.846003605769
====training====
step: 28900
====validation start====
In epoch >> 1
num batches per

In [49]:
print('iteration_time: {}'.format(end_time-start_time))

iteration_time: 21424.92995738983


In [50]:
start_time = time()
alarm.send_message('Starts runnning RN...')
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-56300.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./babi_result/lookup_table/RN2017-07-10_17:44:39/'))
    summary_writer = tf.summary.FileWriter(save_summary_path, sess.graph)    
    print("====training====")   
    batch_train = batch_iter(train_c, train_q, train_l, train_a, train_c_real_len, train_q_real_len)
    for train in batch_train:
        c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = zip(*train)
        feed_dict={c:c_batch, q:q_batch, l:l_batch, a:a_batch, c_real_len:c_real_len_batch, q_real_len:q_real_len_batch, training_phase: True}
        current_step = sess.run(global_step, feed_dict=feed_dict)
        optimizer.run(feed_dict=feed_dict)
        train_summary = sess.run(train_summary_ops, feed_dict=feed_dict)
        summary_writer.add_summary(train_summary, current_step)
        if current_step % (display_step) == 0:
            print("step: {}".format(current_step))
            print("====validation start====")
            batch_val = batch_iter(val_c, val_q, val_l, val_a, val_c_real_len, val_q_real_len, num_epochs=1)
            accs = []
            for val in batch_val:
                c_val, q_val, l_val, a_val, c_real_len_val, q_real_len_val = zip(*val)
                feed_dict={c:c_val, q:q_val, l:l_val, a:a_val, c_real_len:c_real_len_val, q_real_len:q_real_len_val, training_phase: False}
                acc = accuracy.eval(feed_dict=feed_dict)
                accs.append(acc)
                val_summary= sess.run(val_summary_ops, feed_dict=feed_dict)
                summary_writer.add_summary(val_summary, current_step)
            print("Mean accuracy=" + str(sum(accs)/len(accs)))
            new_saver.save(sess, save_path = save_summary_path, global_step=current_step)
            print("====training====")
end_time = time()
alarm.send_message('RN training is finished! Last validation acc: {}'.format(sum(accs)/len(accs)))

INFO:tensorflow:Restoring parameters from ./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-56300
====training====
In epoch >> 1
num batches per epoch is: 2812
step: 56400
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.909104567308
====training====
step: 56500
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.910957532051
====training====
step: 56600
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.908253205128
====training====
step: 56700
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.912610176282
====training====
step: 56800
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.912259615385
====training====
step: 56900
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.912860576923
====training====
step: 57000
====validation start====
In epoch >> 1
num batches pe

In [51]:
print('iteration_time: {}'.format(end_time-start_time))

iteration_time: 22647.983211040497


In [57]:
start_time = time()
alarm.send_message('Starts runnning RN...')
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-84400.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./babi_result/lookup_table/RN2017-07-10_17:44:39/'))
    summary_writer = tf.summary.FileWriter(save_summary_path, sess.graph)    
    print("====training====")   
    batch_train = batch_iter(train_c, train_q, train_l, train_a, train_c_real_len, train_q_real_len)
    for train in batch_train:
        c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = zip(*train)
        feed_dict={c:c_batch, q:q_batch, l:l_batch, a:a_batch, c_real_len:c_real_len_batch, q_real_len:q_real_len_batch, training_phase: True}
        current_step = sess.run(global_step, feed_dict=feed_dict)
        optimizer.run(feed_dict=feed_dict)
        train_summary = sess.run(train_summary_ops, feed_dict=feed_dict)
        summary_writer.add_summary(train_summary, current_step)
        if current_step % (display_step) == 0:
            print("step: {}".format(current_step))
            print("====validation start====")
            batch_val = batch_iter(val_c, val_q, val_l, val_a, val_c_real_len, val_q_real_len, num_epochs=1)
            accs = []
            for val in batch_val:
                c_val, q_val, l_val, a_val, c_real_len_val, q_real_len_val = zip(*val)
                feed_dict={c:c_val, q:q_val, l:l_val, a:a_val, c_real_len:c_real_len_val, q_real_len:q_real_len_val, training_phase: False}
                acc = accuracy.eval(feed_dict=feed_dict)
                accs.append(acc)
                val_summary= sess.run(val_summary_ops, feed_dict=feed_dict)
                summary_writer.add_summary(val_summary, current_step)
            print("Mean accuracy=" + str(sum(accs)/len(accs)))
            new_saver.save(sess, save_path = save_summary_path, global_step=current_step)
            print("====training====")
end_time = time()
alarm.send_message('RN training is finished! Last validation acc: {}'.format(sum(accs)/len(accs)))

INFO:tensorflow:Restoring parameters from ./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-84400
====training====
In epoch >> 1
num batches per epoch is: 2812
step: 84500
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.920723157051
====training====
step: 84600
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.917067307692
====training====
step: 84700
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.917017227564
====training====
step: 84800
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.920272435897
====training====
step: 84900
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.917317708333
====training====
step: 85000
====validation start====
In epoch >> 1
num batches per epoch is: 312
Mean accuracy=0.916215945513
====training====
step: 85100
====validation start====
In epoch >> 1
num batches pe

KeyboardInterrupt: 

## Test

### number of questions per task
task 1, 2, 3: 995 <br>
task 4: 999 <br>
task 5, 6, 7, 8, 9, 10, 11, 12, 13, 14: 995 <br>
task 15: 996 <br>
task 16: 999 <br>
task 17: 992 <br>
task 18: 997 <br>
task 19: 999 <br>
task 20: 993 <br>
total: 19,910 <br>
num of batch: 19910/64 = 311.09375

In [132]:
answer_index = tf.argmax(prediction, axis=1)

In [133]:
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-194700.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./babi_result/lookup_table/RN2017-07-10_17:44:39/'))
    batch_test = batch_iter(test_c, test_q, test_l, test_a, test_c_real_len, test_q_real_len, num_epochs=1, shuffle=False, is_training=False)
    crcts = []
    idxs = []
    preds = []
    for test in batch_test:
        c_batch, q_batch, l_batch, a_batch, c_real_len_batch, q_real_len_batch = zip(*test)
        feed_dict={c:c_batch, q:q_batch, l:l_batch, a:a_batch, c_real_len:c_real_len_batch, q_real_len:q_real_len_batch, training_phase:False}
        idx = sess.run(answer_index, feed_dict=feed_dict)
        idxs.append(idx)
        crct = correct.eval(feed_dict=feed_dict)
        crcts.append(crct)
        pred = sess.run(predicton, feed_dict=feed_dict)
        preds.extend(pred)

INFO:tensorflow:Restoring parameters from ./babi_result/lookup_table/RN2017-07-10_17:44:39/model_summary-194800
In epoch >> 1
num batches per epoch is: 312


TypeError: Cannot interpret feed_dict key as Tensor: Can not convert a bool_ into a Tensor.

In [113]:
with open('./babi_preprocessd/answer_word_dict.pkl', 'rb') as f:
    answer_word_dict = pickle.load(f)
with open('./babi_preprocessd/test_context.pkl', 'rb') as f:
    test_context = pickle.load(f)
with open('./babi_preprocessd/test_question.pkl', 'rb') as f:
    test_question = pickle.load(f)
with open('./babi_preprocessd/test_answer.pkl', 'rb') as f:
    test_answer = pickle.load(f)

In [100]:
result_list = []
for idx, crct in zip(idxs, crcts):
    for i, c in zip(idx, crct):
        result_list.append([answer_word_dict[i], c])

In [101]:
test_result = pd.DataFrame(data= result_list, columns=['answer', 'score'])

In [117]:
task_7= test_result[: 995]
task_2= test_result[995: 995*2]
task_6= test_result[995*2: 995*3]
task_11= test_result[995*3: 995*4]
task_3= test_result[995*4: 995*5]
task_4= test_result[995*5: 995*5+999]
task_8= test_result[995*5+999: 995*6+999]
task_16= test_result[995*6+999: 995*6+999*2]
task_10= test_result[995*6+999*2: 995*7+999*2]
task_17= test_result[995*7+999*2: 995*7+999*2+992]
task_20= test_result[995*7+999*2+992: 995*7+999*2+992+993]
task_1= test_result[995*7+999*2+992+993: 995*8+999*2+992+993]
task_5= test_result[995*8+999*2+992+993: 995*9+999*2+992+993]
task_19= test_result[995*9+999*2+992+993: 995*9+999*3+992+993]
task_18= test_result[995*9+999*3+992+993: 995*9+999*3+992+993+997]
task_9= test_result[995*9+999*3+992+993+997: 995*10+999*3+992+993+997]
task_12= test_result[995*10+999*3+992+993+997: 995*11+999*3+992+993+997]
task_13= test_result[995*11+999*3+992+993+997: 995*12+999*3+992+993+997]
task_15= test_result[995*12+999*3+992+993+997: 995*12+999*3+992+993+997+996]
task_14= test_result[19968-995:]

In [128]:
task_19['real_answer'] = test_answer[995*9+999*2+992+993: 995*9+999*3+992+993]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [130]:
task_19.to_csv('./babi_result/lookup_table/RN2017-07-10_17:44:39/task_19.csv')

In [123]:
print("task 1 acc: {}".format(sum(task_1.score)/len(task_1)))
print("task 2 acc: {}".format(sum(task_2.score)/len(task_2)))
print("task 3 acc: {}".format(sum(task_3.score)/len(task_3)))
print("task 4 acc: {}".format(sum(task_4.score)/len(task_4)))
print("task 5 acc: {}".format(sum(task_5.score)/len(task_5)))
print("task 6 acc: {}".format(sum(task_6.score)/len(task_6)))
print("task 7 acc: {}".format(sum(task_7.score)/len(task_7)))
print("task 8 acc: {}".format(sum(task_8.score)/len(task_8)))
print("task 9 acc: {}".format(sum(task_9.score)/len(task_9)))
print("task 10 acc: {}".format(sum(task_10.score)/len(task_10)))
print("task 11 acc: {}".format(sum(task_11.score)/len(task_11)))
print("task 12 acc: {}".format(sum(task_12.score)/len(task_12)))
print("task 13 acc: {}".format(sum(task_13.score)/len(task_13)))
print("task 14 acc: {}".format(sum(task_14.score)/len(task_14)))
print("task 15 acc: {}".format(sum(task_15.score)/len(task_15)))
print("task 16 acc: {}".format(sum(task_16.score)/len(task_16)))
print("task 17 acc: {}".format(sum(task_17.score)/len(task_17)))
print("task 18 acc: {}".format(sum(task_18.score)/len(task_18)))
print("task 19 acc: {}".format(sum(task_19.score)/len(task_19)))
print("task 20 acc: {}".format(sum(task_20.score)/len(task_20)))

task 1 acc: 0.9979899497487437
task 2 acc: 0.9276381909547738
task 3 acc: 0.84321608040201
task 4 acc: 1.0
task 5 acc: 0.9959798994974874
task 6 acc: 0.9979899497487437
task 7 acc: 0.9809045226130654
task 8 acc: 0.9467336683417086
task 9 acc: 0.9979899497487437
task 10 acc: 0.992964824120603
task 11 acc: 0.9909547738693467
task 12 acc: 1.0
task 13 acc: 1.0
task 14 acc: 0.9989949748743718
task 15 acc: 1.0
task 16 acc: 0.45845845845845845
task 17 acc: 0.8951612903225806
task 18 acc: 0.9789368104312939
task 19 acc: 0.5735735735735735
task 20 acc: 1.0
