In [1]:
import numpy as np
import tensorflow as tf
import utils
import encoder as enc
import dataset as ds
import train as tr
import decoder as dec
import dataset
import modeltrainer as mt


Using TensorFlow backend.


In [2]:
#======= FLAGS ==========
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('maxout_layer_size', 40, 'Maxout layer size')
tf.app.flags.DEFINE_integer('max_sequence_length', 160, 'Max length of context')
tf.app.flags.DEFINE_integer('max_question_length', 40, 'Max question tokens length')
tf.app.flags.DEFINE_float('learning_rate', 0.0005, 'Learning Rate')
tf.app.flags.DEFINE_integer('maxout_pooling_size', 8, 'Maxout pooling size')
tf.app.flags.DEFINE_integer('lstm_size', 40, 'LSTM cell internal size')
tf.app.flags.DEFINE_string('log_path', '/tmp/working/logs', 'logs location')
tf.app.flags.DEFINE_integer('acc_batch_size', 10, 'How many examples to use to calculate accuracy')
#tf.app.flags.DEFINE_integer('train_batch_size', 20, 'Train Batch Size')
tf.app.flags.DEFINE_integer('max_decoder_iterations', 4, 'Decoder Iterations')
tf.app.flags.DEFINE_integer('max_epoch', 100, 'Max Train Epoch Count')


In [3]:
# remove all variables
#tf.reset_default_graph();

lstm_size = FLAGS.lstm_size
acc_batch_size = FLAGS.acc_batch_size
word_vector_size = 300
maxout_pooling_size = FLAGS.maxout_pooling_size
max_decoder_iterations = FLAGS.max_decoder_iterations
maxout_layer_size = FLAGS.maxout_layer_size;
max_epoch = FLAGS.max_epoch;
max_sequence_length = FLAGS.max_sequence_length;
max_question_length = FLAGS.max_question_length


batch_size = tf.placeholder(tf.int32, ())
learning_rate_ph = tf.placeholder(tf.float32, ())


dropout_rate_ph = tf.placeholder(tf.float32)
question_ph = tf.placeholder(tf.float32, [None, max_question_length, word_vector_size], name="q_input")
document_ph = tf.placeholder(tf.float32, [None, max_sequence_length, word_vector_size], name="d_input")
doc_len_ph = tf.placeholder(tf.int32, [None])
que_len_ph = tf.placeholder(tf.int32, [None])
start_true = tf.placeholder(tf.int32, [None]);
end_true   = tf.placeholder(tf.int32, [None]);
document_size = doc_len_ph
question_size = que_len_ph

with tf.name_scope('ENCODER'):
    # LSTM cell initialization
    lstm = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm = tf.nn.rnn_cell.DropoutWrapper(cell=lstm, output_keep_prob=dropout_rate_ph)


# LSTM cells for Bi-LSTM for COATINATION ENCODER
with tf.name_scope('COATTENTION_ENCODER'):
    lstm_cenc_fw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_fw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_fw, output_keep_prob=dropout_rate_ph)
    lstm_cenc_bw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_bw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_bw, output_keep_prob=dropout_rate_ph)

# create lstm cell for DYNAMIC POINTING DECODER
lstm_dec = tf.nn.rnn_cell.LSTMCell(lstm_size)


# create sentinel vector variable for both encodings 
#with tf.variable_scope("scope1") as scope:
sentinel_q = tf.get_variable("sentinel_q", [ 1, lstm_size ], initializer = tf.random_normal_initializer())
sentinel_d = tf.get_variable("sentinel_d", [ 1, lstm_size ], initializer = tf.random_normal_initializer()) 

tf.summary.histogram('sentinel_q', sentinel_q)
tf.summary.histogram('sentinel_q_max', tf.reduce_max(sentinel_q))
tf.summary.histogram('sentinel_d', sentinel_d)
tf.summary.histogram('sentinel_d_max', tf.reduce_max(sentinel_d))

# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_ph)

In [4]:
# (batch, D, 2L)
U = enc.encoderBatch(
    document_ph, question_ph, 
    document_size, question_size, 
    lstm, lstm_cenc_fw, lstm_cenc_bw, 
    sentinel_d, sentinel_q,
    batch_size,
    FLAGS)
#print(U)

In [5]:
# ===================== DYNAMIC POINTING DECODER =============

iter_start_scores, iter_end_scores = dec.decoderBatch(U, lstm_dec, dropout_rate_ph, batch_size, doc_len_ph, FLAGS)
sum_start_scores = tf.reduce_sum(iter_start_scores, 2)
sum_end_scores = tf.reduce_sum(iter_end_scores, 2)

#sum_loss, accuracy, pr_start_idx, pr_end_idx = tr.loss_and_accuracy(start_true, end_true, batch_size, sum_start_scores, sum_end_scores, max_sequence_length)
sum_loss, accuracy, pr_start_idx, pr_end_idx = tr.loss_and_accuracy_v2(
    start_true, end_true, batch_size, 
    iter_start_scores, iter_end_scores, 
    max_sequence_length, FLAGS.max_decoder_iterations)


tf.summary.scalar('loss_train', sum_loss)



with tf.name_scope('Train'):
    train_step = optimizer.minimize(sum_loss)


In [None]:
variables = {
    'max_sequence_length': max_sequence_length,
    'max_question_length': max_question_length,
    'question_ph': question_ph,
    'document_ph': document_ph,
    'dropout_rate_ph': dropout_rate_ph,
    'doc_len_ph': doc_len_ph,
    'que_len_ph': que_len_ph,
    'start_true_ph': start_true,
    'end_true_ph': end_true,
    'batch_size_ph': batch_size,
    'learning_rate_ph': learning_rate_ph
}

ops = {
    'train_step_op': train_step, 
    'sum_loss_op': sum_loss, 
    'accuracy_op': accuracy,
    'pr_start_idx_op': pr_start_idx, 
    'pr_end_idx_op': pr_end_idx
}

hps = mt.HyperParamsSpace(dropouts = [0.7], batches = [10], lrates=[0.001])
with mt.ModelTrainer('./english/glove.840B.300d.w2vformat.bin', FLAGS.log_path) as trainer:
    trainer.set_variables(variables)
    trainer.set_ops(ops)
    for i_ in range(0, 1):
        hp = hps.getRand()
        trainer.reset(hp)
        step = 0;
        for epoch_ in range(0, 100):
            
            h_param_str = utils.make_h_param_string_2(hp)
            step = trainer.train(hp, './english/train_160.csv', 500, step);
            trainer.accuracy(hp, './english/train_160.csv', epoch_, 500, 'train', 20);
            trainer.accuracy(hp, './english/test_160.csv', epoch_, 500, 'valid', 20);
        

Embeddings are loaded to memory
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; lrate= 0.001
dropout =  0.7 ; batch_size =  10 ; l