In [1]:
import numpy as np
import tensorflow as tf
#import maxout
import highway_maxout as hmn
import utils
import encoder as enc
import dataset as ds
import train as tr
import decoder as dec
import time
from itertools import islice


Using TensorFlow backend.


In [2]:
#======= FLAGS ==========
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('maxout_layer_size', 200, 'Maxout layer size')
tf.app.flags.DEFINE_integer('max_sequence_length', 160, 'Max length of context')
tf.app.flags.DEFINE_integer('max_question_length', 40, 'Max question tokens length')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'Learning Rate')
tf.app.flags.DEFINE_integer('maxout_pooling_size', 8, 'Maxout pooling size')
tf.app.flags.DEFINE_integer('lstm_size', 200, 'LSTM cell internal size')
tf.app.flags.DEFINE_string('log_path', '/tmp/working/logs', 'logs location')
tf.app.flags.DEFINE_integer('acc_batch_size', 10, 'How many examples to use to calculate accuracy')
tf.app.flags.DEFINE_integer('train_batch_size', 10, 'Train Batch Size')
tf.app.flags.DEFINE_integer('max_decoder_iterations', 4, 'Decoder Iterations')
tf.app.flags.DEFINE_integer('max_epoch', 200, 'Max Train Epoch Count')


In [3]:
# remove all variables
#tf.reset_default_graph();

lstm_size = FLAGS.lstm_size
acc_batch_size = FLAGS.acc_batch_size
word_vector_size = 300
maxout_pooling_size = FLAGS.maxout_pooling_size
max_decoder_iterations = FLAGS.max_decoder_iterations
maxout_layer_size = FLAGS.maxout_layer_size;
max_epoch = FLAGS.max_epoch;
max_sequence_length = FLAGS.max_sequence_length;
max_question_length = FLAGS.max_question_length
batch_size = FLAGS.train_batch_size


dropout_rate_ph = tf.placeholder(tf.float32)
question_ph = tf.placeholder(tf.float32, [batch_size, max_question_length, word_vector_size], name="q_input")
document_ph = tf.placeholder(tf.float32, [batch_size, max_sequence_length, word_vector_size], name="d_input")
doc_len_ph = tf.placeholder(tf.int32, [batch_size])
que_len_ph = tf.placeholder(tf.int32, [batch_size])
document_size = doc_len_ph
question_size = que_len_ph

with tf.name_scope('ENCODER'):
    # LSTM cell initialization
    lstm = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm = tf.nn.rnn_cell.DropoutWrapper(cell=lstm, output_keep_prob=dropout_rate_ph)


# LSTM cells for Bi-LSTM for COATINATION ENCODER
with tf.name_scope('COATTENTION_ENCODER'):
    lstm_cenc_fw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_fw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_fw, output_keep_prob=dropout_rate_ph)
    lstm_cenc_bw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_bw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_bw, output_keep_prob=dropout_rate_ph)

# create lstm cell for DYNAMIC POINTING DECODER
lstm_dec = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# get lstm initial state of zeroes
#lstm_dec_state = lstm_dec.zero_state(1, tf.float32)
start_pos = 0; # ?generate random between (0, document_size-1)
end_pos = 0;   # ?generate random between (0, document_size-1)

# create sentinel vector variable for both encodings 
#with tf.variable_scope("scope1") as scope:
sentinel_q = tf.get_variable("sentinel_q", [ 1, lstm_size ], initializer = tf.random_normal_initializer())
sentinel_d = tf.get_variable("sentinel_d", [ 1, lstm_size ], initializer = tf.random_normal_initializer()) 

tf.summary.histogram('sentinel_q', sentinel_q)
tf.summary.histogram('sentinel_q_max', tf.reduce_max(sentinel_q))
tf.summary.histogram('sentinel_d', sentinel_d)
tf.summary.histogram('sentinel_d_max', tf.reduce_max(sentinel_d))

# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

In [4]:
# (batch, D, 2L)
U = enc.encoderBatch(
    document_ph, question_ph, 
    document_size, question_size, 
    lstm, lstm_cenc_fw, lstm_cenc_bw, 
    sentinel_d, sentinel_q, 
    FLAGS)
print(U)

Tensor("COATTENTION_ENCODER_1/Slice_2:0", shape=(10, 160, 400), dtype=float32)


In [5]:
# ===================== DYNAMIC POINTING DECODER =============

sum_start_scores, sum_end_scores = dec.decoderBatch(U, lstm_dec, dropout_rate_ph, FLAGS)

start_true = tf.placeholder(tf.int32, [batch_size]);
end_true   = tf.placeholder(tf.int32, [batch_size]);
    
#sum_start_scores = tf.Print(sum_start_scores, [start_end_true, sum_start_scores], 'sum_start_scores: ')
#sum_end_scores = tf.Print(sum_end_scores, [start_end_true, sum_end_scores], 'sum_start_scores: ')
    
    
# loss and train step
onehot_labels_start = tf.one_hot(start_true, max_sequence_length)
onehot_labels_end   = tf.one_hot(end_true, max_sequence_length)
#print("sum_start_scores", sum_start_scores)
with tf.name_scope('Loss'):
    loss_start = tf.nn.softmax_cross_entropy_with_logits(
        labels = onehot_labels_start,
        logits = sum_start_scores)
    loss_start = tf.reduce_mean(loss_start)
    loss_end = tf.nn.softmax_cross_entropy_with_logits(
        labels = onehot_labels_end,
        logits = sum_end_scores)
    loss_end = tf.reduce_mean(loss_end)
    sum_loss = loss_start + loss_end


tf.summary.histogram('sum_start_scores', sum_start_scores)
tf.summary.histogram('sum_end_scores', sum_end_scores)
tf.summary.scalar('loss_test', sum_loss, ["TEST_STAT"])
tf.summary.scalar('loss_train', sum_loss, ["TRAIN_STAT"])
    
with tf.name_scope('Accuracy'):
    with tf.name_scope('Prediction'):
        pr_start_idx = tf.to_int32(tf.argmax(sum_start_scores, 1))
        pr_end_idx = tf.to_int32(tf.argmax(sum_end_scores, 1))
         
    with tf.name_scope('Accuracy'):
        accuracy_avg = tf.py_func(utils.f1_score_int_avg, [pr_start_idx, pr_end_idx, start_true, end_true], tf.float64)
        #accuracy_avg = tf.Print(accuracy_avg, [[start_true, end_true], [pr_start_idx, pr_end_idx], accuracy_avg], 'True and Predicted: ')


#avg_accuracy_ph = tf.placeholder(tf.float32, ())
tf.summary.scalar('avg_accuracy_test',  accuracy_avg, ["TEST_STAT"])
tf.summary.scalar('avg_accuracy_train', accuracy_avg, ["TRAIN_STAT"])


with tf.name_scope('Train'):
    train_step = optimizer.minimize(sum_loss)


In [None]:
#=========== Training ==================

#dataset = ds.getDataset(["./train_train_task_b.csv"], max_sequence_length)
#iterator = dataset.make_one_shot_iterator()
#next_element = iterator.get_next()

#dataset_validation = ds.getDataset(["./test_dataset_160.csv"], max_sequence_length)
#iterator_valid = dataset_validation.make_one_shot_iterator()
#next_element_valid = iterator_valid.get_next()

summary_op_train = tf.summary.merge_all("TRAIN_STAT")
summary_op_test = tf.summary.merge_all("TEST_STAT")
summary_op = tf.summary.merge_all()

init = tf.global_variables_initializer()
DATASET_LENGTH = 5
with tf.Session() as sess:
    sess.run(init)
    h_param_str = utils.make_h_param_string(FLAGS.learning_rate, FLAGS.lstm_size, max_sequence_length, FLAGS.maxout_pooling_size)
    writer = tf.summary.FileWriter(FLAGS.log_path + "/4-" + h_param_str, sess.graph)
    with open('test_dataset_160.csv') as file_test:
        for epoch_ in range(200 or max_epoch):
            step_ = 0;
            with open('train_dataset_160.csv') as file_train:
                while True:
                    feed_dict = tr.processLineBatch(file_train, batch_size, 
                                                    max_sequence_length, max_question_length, 
                                                    question_ph, document_ph, dropout_rate_ph,
                                                    doc_len_ph, que_len_ph, start_true, end_true,
                                                    0.5)
                    if feed_dict is None: break
                    tr.trainStep(sess, 
                                 feed_dict, 
                                 writer, 
                                 train_step, sum_loss, accuracy_avg, summary_op, summary_op_train, 
                                 epoch_ * DATASET_LENGTH + step_, profiling=False)
                    step_+= 1
                    if (step_ >= DATASET_LENGTH): break;
                        
            print('Epoch', epoch_, 'completed')
            test_params = tr.processLineBatch(file_test, batch_size, max_sequence_length, max_question_length, 
                                              question_ph, document_ph, dropout_rate_ph,
                                              doc_len_ph, que_len_ph, start_true, end_true,
                                              1)
            tr.accuracyTest(sess, test_params, writer, 
                            accuracy_avg, summary_op, summary_op_test, pr_start_idx, pr_end_idx, epoch_)
        print('End')
            

Epoch 0 completed
AVG accuracy 0.0
Train Error {<tf.Tensor 'q_input:0' shape=(10, 40, 300) dtype=float32>: [array([[ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       [-0.007306,  0.057647,  0.024153, ...,  0.03632 , -0.041262,
         0.029278],
       [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       ..., 
       [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ]]), array([[ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       [-0.005994, -0.041677,  0.002207, ..., -0.040899, -0.028839,
         0.029405],
       [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       ..., 
       [ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,  0.      ],
       [ 0.      ,  0.      ,  0.      , ...,  0

Epoch 1 completed
AVG accuracy 0.05
Epoch 2 completed
AVG accuracy 0.0
Epoch 3 completed
AVG accuracy 0.04
