In [1]:
import numpy as np
import tensorflow as tf
#import maxout
import highway_maxout as hmn
import utils
import encoder as enc
import dataset as ds

In [2]:
#======= FLAGS ==========
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('maxout_layer_size', 20, 'Maxout layer size')
tf.app.flags.DEFINE_integer('max_sequence_length', 160, 'Max length of context')
tf.app.flags.DEFINE_integer('max_question_length', 40, 'Max question tokens length')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'Learning Rate')
tf.app.flags.DEFINE_integer('maxout_pooling_size', 8, 'Maxout pooling size')
tf.app.flags.DEFINE_integer('lstm_size', 20, 'LSTM cell internal size')
tf.app.flags.DEFINE_string('log_path', '/tmp/working/logs', 'logs location')
tf.app.flags.DEFINE_integer('acc_batch_size', 5, 'How many examples to use to calculate accuracy')


In [3]:
# remove all variables
#tf.reset_default_graph();

lstm_size = FLAGS.lstm_size
acc_batch_size = FLAGS.acc_batch_size
word_vector_size = 300
maxout_pooling_size = FLAGS.maxout_pooling_size
max_decoder_iterations = 4
maxout_layer_size = FLAGS.maxout_layer_size;
max_epoch = 100;
max_sequence_length = FLAGS.max_sequence_length;
max_question_length = FLAGS.max_question_length


dropout_rate_ph = tf.placeholder(tf.float32)
question_ph = tf.placeholder(tf.float32, [1, max_question_length, word_vector_size], name="q_input")
document_ph = tf.placeholder(tf.float32, [1, max_sequence_length, word_vector_size], name="d_input")
doc_len_ph = tf.placeholder(tf.int32, ())
que_len_ph = tf.placeholder(tf.int32, ())
document_size = doc_len_ph
question_size = que_len_ph

with tf.name_scope('ENCODER'):
    # LSTM cell initialization
    lstm = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm = tf.nn.rnn_cell.DropoutWrapper(cell=lstm, output_keep_prob=dropout_rate_ph)


# LSTM cells for Bi-LSTM for COATINATION ENCODER
with tf.name_scope('COATTENTION_ENCODER'):
    lstm_cenc_fw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_fw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_fw, output_keep_prob=dropout_rate_ph)
    lstm_cenc_bw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_bw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_bw, output_keep_prob=dropout_rate_ph)

# create lstm cell for DYNAMIC POINTING DECODER
lstm_dec = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# get lstm initial state of zeroes
#lstm_dec_state = lstm_dec.zero_state(1, tf.float32)
start_pos = 0; # ?generate random between (0, document_size-1)
end_pos = 0;   # ?generate random between (0, document_size-1)

# create sentinel vector variable for both encodings 
#with tf.variable_scope("scope1") as scope:
sentinel_q = tf.get_variable("sentinel_q", [ lstm_size , 1], initializer = tf.random_normal_initializer())
sentinel_d = tf.get_variable("sentinel_d", [ lstm_size , 1], initializer = tf.random_normal_initializer()) 

#tf.summary.histogram('sentinel_q', sentinel_q)
#tf.summary.histogram('sentinel_q_max', tf.reduce_max(sentinel_q))
#tf.summary.histogram('sentinel_d', sentinel_d)
#tf.summary.histogram('sentinel_d_max', tf.reduce_max(sentinel_d))

# optimizer

optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

In [4]:
U = enc.encoder(
    document_ph, question_ph, 
    document_size, question_size, 
    lstm, lstm_cenc_fw, lstm_cenc_bw, 
    sentinel_d, sentinel_q, 
    FLAGS)

Tensor("Slice_1:0", shape=(?, ?), dtype=float32)
Tensor("COATTENTION_ENCODER_1/strided_slice:0", shape=(?, 40), dtype=float32)


In [5]:
# ===================== DYNAMIC POINTING DECODER =============

# returns tuple (scores_start, scores_end, strart_pos, start_end, new_lstm_state)
def decoderIteration(U, lstm_state, start_pos, end_pos, iter_number):
    with tf.name_scope('Decoder_Iteration'):
        with tf.name_scope('Next_Start'):
            scores_start = hmn.HMN2(U, 
                               tf.transpose(lstm_state.h), 
                               tf.slice(U, [0, start_pos], [lstm_size*2, 1]) ,
                               tf.slice(U, [0, end_pos], [lstm_size*2, 1]) , 
                               document_size,
                               'start',
                                FLAGS,
                                dropout_rate_ph,
                                iter_number)

            new_start_pos = tf.to_int32(tf.argmax(scores_start, 0))

        with tf.name_scope('Next_End'):
            scores_end = hmn.HMN2(U, 
                             tf.transpose(lstm_state.h), 
                             tf.slice(U, [0, new_start_pos], [lstm_size*2, 1],) ,
                             tf.slice(U, [0, end_pos], [lstm_size*2, 1]), 
                             document_size,
                            'end',
                            FLAGS,
                            dropout_rate_ph,
                            iter_number)
            new_end_pos = tf.to_int32(tf.argmax(scores_end, 0))
        
        with tf.name_scope('LSTM_State_Update'):
            lstm_input = tf.concat(
                [tf.slice(U, [0, new_start_pos], [lstm_size*2, 1], name='slice-5'), tf.slice(U, [0, new_end_pos], [lstm_size*2, 1])],
                axis = 0
            )
            output, new_lstm_state = lstm_dec(tf.reshape(lstm_input, [1, lstm_size*4]), lstm_state)
        
        return scores_start, scores_end, new_start_pos , new_end_pos, new_lstm_state




with tf.name_scope('DYNAMIC_POINTING_DECODER'):
    i = tf.constant(0)
    prev_start_pos = tf.constant(-1, shape=tf.TensorShape([]), dtype=tf.int32)
    prev_end_pos = tf.constant(-1, shape=tf.TensorShape([]), dtype=tf.int32)
    start_pos = tf.constant(0, shape=tf.TensorShape([]), dtype=tf.int32)
    end_pos = tf.constant(0, shape=tf.TensorShape([]), dtype=tf.int32)
    #prev_start_pos = 0;
    sum_start_scores = tf.zeros([1, document_size])
    sum_end_scores = tf.zeros([1, document_size])
    lstm_dec_state = lstm_dec.zero_state(1, tf.float32)
    while_data = (i, (prev_start_pos, prev_end_pos), (start_pos, end_pos), (sum_start_scores, sum_end_scores), lstm_dec_state, U)
    while_cond = lambda i, prev, cur, sum, st, u: tf.logical_and(tf.less_equal(i, max_decoder_iterations), tf.logical_or(tf.not_equal(prev[0], cur[0]), tf.not_equal(prev[1], cur[1])))

    def while_body(i, prev, cur, summ, state, U):
        scores_start, scores_end, new_start_pos, new_end_pos, lstm_dec_state = decoderIteration(U, state, cur[0], cur[1], tf.add(i, 1))
        scores_start.set_shape([1, None])
        scores_end.set_shape([1, None])
        new_start_pos.set_shape(tf.TensorShape([]))
        new_end_pos.set_shape(tf.TensorShape([]))
        sum_start_scores = tf.add(summ[0], scores_start)
        sum_end_scores   = tf.add(summ[1], scores_end)
        return (tf.add(i, 1), cur, (new_start_pos, new_end_pos), (sum_start_scores, sum_end_scores), lstm_dec_state, U)
    
    _, _, _, (sum_start_scores, sum_end_scores), _, _ = tf.while_loop(while_cond, while_body, while_data)


start_end_true = tf.placeholder(tf.int32, [2]);
    
#sum_start_scores = tf.Print(sum_start_scores, [start_end_true, sum_start_scores], 'sum_start_scores: ')
#sum_end_scores = tf.Print(sum_end_scores, [start_end_true, sum_end_scores], 'sum_start_scores: ')
    
# loss and train step


#end_true = tf.placeholder(tf.int32, ());
onehot_labels = tf.one_hot(start_end_true, document_size)
with tf.name_scope('Loss'):
    sum_loss = tf.nn.softmax_cross_entropy_with_logits(
        labels = onehot_labels,
        logits = tf.concat([sum_start_scores, sum_end_scores], axis=0))
    sum_loss = tf.reduce_max(sum_loss)


tf.summary.histogram('sum_start_scores', sum_start_scores)
tf.summary.histogram('sum_end_scores', sum_end_scores)
tf.summary.scalar('loss_test', sum_loss, ["TEST_STAT"])
tf.summary.scalar('loss', sum_loss, ["TRAIN_STAT"])
    
with tf.name_scope('Accuracy'):
    with tf.name_scope('Prediction'):
        pr_start_idx = tf.to_int32(tf.argmax(sum_start_scores, 1))[0]
        pr_end_idx = tf.to_int32(tf.argmax(sum_end_scores, 1))[0]
        
    with tf.name_scope('Accuracy'):
        accuracy = tf.py_func(utils.f1_score_int, [pr_start_idx, pr_end_idx, start_end_true[0], start_end_true[1]], tf.float64)
        accuracy = tf.Print(accuracy, [start_end_true, [pr_start_idx, pr_end_idx], accuracy], 'True and Predicted: ')
#tf.summary.scalar('accuracy', accuracy)

avg_accuracy_ph = tf.placeholder(tf.float32, ())
tf.summary.scalar('avg_accuracy', avg_accuracy_ph, ["TEST_STAT"])
tf.summary.scalar('avg_accuracy_train', accuracy, ["TRAIN_STAT"])


with tf.name_scope('Train'):
    train_step = optimizer.minimize(sum_loss)


In [None]:
#=========== Training ==================

last_avg_accuracy = 0.0

def processLine(str, max_doc_length, max_que_length):
    start_pos, end_pos, doc, que = str.split(';')
    start_pos = int(start_pos)
    end_pos = int(end_pos)
    document = doc.split(' ')
    question = que.split(' ')
    doc_v = ds.sentence2Vectors_onstring(document, max_doc_length)
    que_v = ds.sentence2Vectors_onstring(question, max_que_length)
    return start_pos, end_pos, document, question, doc_v, que_v


def accuracyValidation(acc_batch_size, step):
    global last_avg_accuracy
    acc_accum = 0
    summary_op_test = tf.summary.merge_all("TEST_STAT")
    for step_accuracy_ in range(acc_batch_size):
        start_true, end_true, doc, que, doc_v, que_v = sess.run(next_element_valid)
        
        acc, stat, stat_test, s, e = sess.run(
            (accuracy, summary_op, summary_op_test, pr_start_idx, pr_end_idx),
            feed_dict={
                question_ph: [que_v], 
                document_ph: [doc_v], 
                start_end_true: [start_true, end_true],
                doc_len_ph: len(doc),
                que_len_ph: len(que),
                dropout_rate_ph: 1.0,
                avg_accuracy_ph: last_avg_accuracy
            }
        )
        #print('Predicted answer', utils.substr(doc, s, e))
        #print('True answer', utils.substr(doc, start_true, end_true))
        #writer.add_summary(stat,  step* 10 + step_accuracy_)
        #print("acc", s, e, start_true, end_true)
        acc_accum += acc;
        #print("acc:", acc, "Total:", acc_accum)
    last_avg_accuracy = acc_accum/acc_batch_size
    writer.add_summary(stat_test,  step)
    #writer.add_summary(stat,  step)
    print('AVG accuracy', last_avg_accuracy)

def trainStep(feed_dict, step, profiling = False):
    global last_avg_accuracy
    #start_true, end_true, doc, que, doc_v, que_v = sess.run(next_element)
    if start_true < 0 or end_true > max_sequence_length - 1: 
        print('Ignore step', start_true, end_true)
        return
    
    run_options = None
    run_metadata = None
    if profiling:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
    _,loss, _, stat, stat_train = sess.run(
        (train_step, sum_loss, accuracy,  summary_op, summary_op_train),
        feed_dict = feed_dict,
        options=run_options, run_metadata=run_metadata
    )
    #if step % 25 == 0 : print(step, loss, start_true, end_true)
    if profiling: writer.add_run_metadata(run_metadata, 'step%d' % step)
    writer.add_summary(stat, step)
    writer.add_summary(stat_train, step)


#dataset = ds.getDataset(["./train_train_task_b.csv"], max_sequence_length)
#iterator = dataset.make_one_shot_iterator()
#next_element = iterator.get_next()

dataset_validation = ds.getDataset(["./valid_dataset_160.csv"], max_sequence_length)
iterator_valid = dataset_validation.make_one_shot_iterator()
next_element_valid = iterator_valid.get_next()

summary_op_train = tf.summary.merge_all("TRAIN_STAT")
summary_op_test = tf.summary.merge_all("TEST_STAT")
summary_op = tf.summary.merge_all()

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    h_param_str = utils.make_h_param_string(FLAGS.learning_rate, FLAGS.lstm_size, max_sequence_length, FLAGS.maxout_pooling_size)
    writer = tf.summary.FileWriter(FLAGS.log_path + "/1-" + h_param_str, sess.graph)
    for epoch_ in range(200 or max_epoch):
        #iterator = dataset.make_one_shot_iterator()
        #next_element = iterator.get_next()
        #iterator_valid = dataset_validation.make_one_shot_iterator()
        #next_element_valid = iterator_valid.get_next()
        #fout = open('train_dataset_processed.csv', 'w')
        step_ = 0;
        with open('train_dataset_160.csv') as inf:
            for line in inf:
                try: 
                    start_true, end_true, doc, que, doc_v, que_v = processLine(line, max_sequence_length, max_question_length)
                    feed_dict = {
                        question_ph: [que_v], 
                        document_ph: [doc_v], 
                        start_end_true: [start_true, end_true],
                        doc_len_ph: len(doc),
                        que_len_ph: len(que),
                        avg_accuracy_ph: last_avg_accuracy,
                        dropout_rate_ph: 0.5
                    }
                    profiling = False and True if (step_ % 51 == 0) else False
                    trainStep(feed_dict, epoch_*50 + step_, profiling=profiling)

                    if step_ > 0 and step_ % 100 == 0:
                        # --------- ACCURACY -------------
                        accuracyValidation(acc_batch_size, epoch_*9430 + step_)

                    step_+=1
                except tf.errors.OutOfRangeError:
                    print("End of dataset")  # ==> "End of dataset"
                    break;
                except: 
                    print('Error', "skip");
                    step_+=1
                if (step_ > 50): break;
        print('Epoch', epoch_, 'completed')
    print('End')


Epoch 0 completed
