In [1]:
import numpy as np
import tensorflow as tf
#import maxout
import highway_maxout as hmn
import utils
import dataset as ds

In [2]:
#======= FLAGS ==========
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('maxout_layer_size', 50, 'Maxout layer size')
tf.app.flags.DEFINE_integer('maxout_pooling_size', 4, 'Maxout pooling size')
tf.app.flags.DEFINE_integer('lstm_size', 50, 'LSTM cell internal size')
tf.app.flags.DEFINE_string('log_path', '/tmp/dcn', 'logs location')
tf.app.flags.DEFINE_integer('acc_batch_size', 5, 'How many examples to use to calculate accuracy')


In [3]:
# remove all variables
tf.reset_default_graph();

lstm_size = FLAGS.lstm_size
acc_batch_size = FLAGS.acc_batch_size
word_vector_size = 300
maxout_pooling_size = FLAGS.maxout_pooling_size
max_decoder_iterations = 4
maxout_layer_size = FLAGS.maxout_layer_size;
max_epoch = 1000;
max_sequence_length = 80
#training_set_size = 100;

# 
question_ph = tf.placeholder(tf.float32, [1, max_sequence_length, word_vector_size], name="q_input")
document_ph = tf.placeholder(tf.float32, [1, max_sequence_length, word_vector_size], name="d_input")


with tf.name_scope('ENCODER'):
    # LSTM cell initialization
    lstm = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm = tf.nn.rnn_cell.DropoutWrapper(cell=lstm, output_keep_prob=0.5)


# LSTM cells for Bi-LSTM for COATINATION ENCODER
with tf.name_scope('COATTENTION_ENCODER'):
    lstm_cenc_fw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_fw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_fw, output_keep_prob=0.5)
    lstm_cenc_bw = tf.nn.rnn_cell.LSTMCell(lstm_size)
    lstm_cenc_bw = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cenc_bw, output_keep_prob=0.5)

# create lstm cell for DYNAMIC POINTING DECODER
lstm_dec = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# get lstm initial state of zeroes
#lstm_dec_state = lstm_dec.zero_state(1, tf.float32)
start_pos = 0; # ?generate random between (0, document_size-1)
end_pos = 0;   # ?generate random between (0, document_size-1)

# create sentinel vector variable for both encodings 
#with tf.variable_scope("scope1") as scope:
sentinel_q = tf.get_variable("sentinel_q", [ lstm_size , 1], initializer = tf.random_normal_initializer())
sentinel_d = tf.get_variable("sentinel_d", [ lstm_size , 1], initializer = tf.random_normal_initializer()) 

tf.summary.histogram('sentinel_q', sentinel_q)
tf.summary.histogram('sentinel_q_max', tf.reduce_max(sentinel_q))
tf.summary.histogram('sentinel_d', sentinel_d)
tf.summary.histogram('sentinel_d_max', tf.reduce_max(sentinel_d))

# optimizer

optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)

In [4]:

# r = lstm(inputs = tf.convert_to_tensor([[1,2], [2,3]], dtype=tf.float32), state = zero_state_q)

def length(sequence):
  used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
  length = tf.reduce_sum(used, 1)
  length = tf.cast(length, tf.int32)
  return length

'''
transform tensor of shape [1, question_size, word_vector_size] to list of tensors of shape [1, word_vector_size]
of length question_size. first dimenstion is batch size = 1
'''

#print(tf.shape(question_ph)[1])
#question_input = tf.unstack(question_ph, max_sequence_length, 1)
#document_input = tf.unstack(document_ph, max_sequence_length, 1)
#print(x)

# we use the same LSTM for both encodings to share weights
with tf.name_scope('ENCODER'):
    with tf.name_scope('Q_ENC'):
        outputs_q, state_q = tf.nn.dynamic_rnn(lstm, inputs = question_ph, sequence_length = length(question_ph), dtype=tf.float32)
    with tf.name_scope('D_ENC'):
        outputs_d, state_d = tf.nn.dynamic_rnn(lstm, inputs = document_ph, sequence_length = length(document_ph), dtype=tf.float32)


document_size = length(document_ph)[0]
question_size = length(question_ph)[0]
doc_padding = tf.subtract([0, max_sequence_length], [0, document_size])
que_padding = tf.subtract([0, max_sequence_length], [0, question_size])


# "squeeze" transforms list of tensors of shape [1, lstm_size] of length L to tensor of shape [L, lstm_size]
que_enc = tf.transpose(tf.squeeze(outputs_q))
que_enc = tf.slice(que_enc, [0,0], [lstm_size, question_size])
que_enc_sentinel = tf.concat([que_enc, sentinel_q], axis = 1)
que_enc_sentinel = tf.pad(que_enc_sentinel, [[0,0], que_padding])
que_enc_sentinel.set_shape([lstm_size, max_sequence_length + 1])
que_enc_sentinel = utils.non_linear_projection(que_enc_sentinel)
que_enc_sentinel = tf.slice(que_enc_sentinel, [0,0], [lstm_size, question_size + 1])
#que_enc_sentinel.set_shape([lstm_size, max_sequence_length + 1])

doc_enc = tf.transpose(tf.squeeze(outputs_d))
doc_enc = tf.slice(doc_enc, [0,0], [lstm_size, document_size])
#doc_enc = tf.pad(doc_enc, [[0,0], doc_padding])
#doc_enc.set_shape([lstm_size, max_sequence_length])


tf.summary.histogram('QUE_enc', que_enc)
tf.summary.histogram('DOC_enc', doc_enc)
tf.summary.histogram('DOC_enc_max', tf.reduce_max(doc_enc))
tf.summary.histogram('QUE_enc_max', tf.reduce_max(que_enc))
tf.summary.histogram('Document_size', document_size)
tf.summary.histogram('Question_size', length(question_ph)[0])


# append sentinel vector for both encodings 
doc_enc_sentinel = tf.concat([doc_enc, sentinel_d], axis = 1)
#que_enc_sentinel = utils.non_linear_projection(tf.concat([que_enc, sentinel_q], axis = 1))
print(que_enc_sentinel)
#que_enc_sentinel = tf.slice(que_enc_sentinel, [0,0], [lstm_size, question_size + 1])

# ===================  COATTENTION ENCODER ===================
with tf.name_scope('COATTENTION_ENCODER'):
    # L \in R(doc_size + 1) x (que_size + 1)
    L = tf.matmul(doc_enc_sentinel, que_enc_sentinel, transpose_a = True)
    A_Q = tf.nn.softmax(L, 0)
    A_D = tf.nn.softmax(tf.transpose(L), 1)
    C_Q = tf.matmul(doc_enc_sentinel, A_Q)
    # C_D \in R_2*lstm_size x (doc_size + 1)
    C_D = tf.matmul(tf.concat([que_enc_sentinel, C_Q], axis = 0), A_D)

    # bi_lstm_input = tf.unstack(tf.reshape(tf.transpose(tf.concat([doc_enc_sentinel, C_D], axis = 0)), [max_sequence_length + 1, 1, 3*lstm_size]))
    # TODO Q: would we use single cell of two different
    bi_lstm_input = tf.concat([doc_enc_sentinel, C_D], axis = 0)
    bi_lstm_input = tf.transpose(bi_lstm_input)
    bi_lstm_input = tf.reshape(bi_lstm_input, [1, document_size + 1, 3*lstm_size])
    
    tf.summary.histogram('bi_lstm_input', bi_lstm_input)
    
    outputs_bi, output_state = tf.nn.bidirectional_dynamic_rnn(
        cell_fw = lstm_cenc_fw, 
        cell_bw = lstm_cenc_bw,
      #  cell_bw = lstm_cenc_bw,
        inputs = bi_lstm_input,
       # sequence_length = [document_size[0] + 1],
        dtype=tf.float32
    )

    # we take first because of we feed to bi-RNN only one sentence
    outputs_bi = tf.concat(outputs_bi, axis=2)[0]
    print(outputs_bi)
    U = tf.slice(outputs_bi, [0,0], [document_size, 2*lstm_size])
    U = tf.transpose(U)
#print(U)
tf.summary.histogram('U', U)
tf.summary.histogram('U_max', tf.reduce_max(U))

Tensor("Slice_1:0", shape=(?, ?), dtype=float32)
Tensor("COATTENTION_ENCODER_1/strided_slice:0", shape=(?, 100), dtype=float32)


<tf.Tensor 'U_max:0' shape=() dtype=string>

In [5]:
# ===================== DYNAMIC POINTING DECODER =============


#scope = tf.get_variable_scope()
#u_t = get_scope_variable(scope, 'hmn_u_t', [2*lstm_size, 1]) 
#h_i = get_scope_variable(scope, 'hmn_h_i', [lstm_size, 1 ]) 
#u_s_i = get_scope_variable(scope, 'hmn_u_s_i', [2*lstm_size, 1])
#u_e_i = get_scope_variable(scope, 'hmn_u_e_i', [2*lstm_size, 1])


#m_3 = HMN(U, h_i, u_s_i, u_e_i)
#print(m_3)

# returns tuple (scores_start, scores_end, strart_pos, start_end, new_lstm_state)
def decoderIteration(U, lstm_state, start_pos, end_pos, iter_number):
    with tf.name_scope('Decoder_Iteration'):
        with tf.name_scope('Next_Start'):
            scores_start = hmn.HMN(U, 
                               tf.transpose(lstm_state.h), 
                               tf.slice(U, [0, start_pos], [lstm_size*2, 1]) ,
                               tf.slice(U, [0, end_pos], [lstm_size*2, 1]) , 
                               document_size,
                               'start',
                                FLAGS,
                                iter_number)

            new_start_pos = tf.to_int32(tf.argmax(scores_start, 0))

        #print(lstm_state)
        with tf.name_scope('Next_End'):
            scores_end = hmn.HMN(U, 
                             tf.transpose(lstm_state.h), 
                             tf.slice(U, [0, new_start_pos], [lstm_size*2, 1],) ,
                             tf.slice(U, [0, end_pos], [lstm_size*2, 1]), 
                             document_size,
                            'end',
                            FLAGS,
                            iter_number)
            new_end_pos = tf.to_int32(tf.argmax(scores_end, 0))
        
        with tf.name_scope('LSTM_State_Update'):
            lstm_input = tf.concat(
                [tf.slice(U, [0, new_start_pos], [lstm_size*2, 1], name='slice-5'), tf.slice(U, [0, new_end_pos], [lstm_size*2, 1])],
                axis = 0
            )
            output, new_lstm_state = lstm_dec(tf.reshape(lstm_input, [1, lstm_size*4]), lstm_state)
        
        #print(new_lstm_state)
        return scores_start, scores_end, new_start_pos , new_end_pos, new_lstm_state



#print(lstm_dec_state)

with tf.name_scope('DYNAMIC_POINTING_DECODER'):
    
    start_pos = 0;
    end_pos = 0;
    sum_start_scores = tf.zeros([1, document_size])
    sum_end_scores = tf.zeros([1, document_size])
    lstm_dec_state = lstm_dec.zero_state(1, tf.float32)
    
    for step in range(max_decoder_iterations):
        scores_start, scores_end, new_start_pos, new_end_pos, lstm_dec_state = decoderIteration(U, lstm_dec_state, start_pos, end_pos, step + 1)
        sum_start_scores = tf.add(sum_start_scores, scores_start)
        sum_end_scores   = tf.add(sum_end_scores, scores_end)
        if new_start_pos == start_pos and end_pos == new_end_pos : break
        start_pos = new_start_pos
        end_pos = new_end_pos

    
# loss and train step
start_end_true = tf.placeholder(tf.int32, [2]);
#end_true = tf.placeholder(tf.int32, ());
onehot_labels = tf.one_hot(start_end_true, document_size)
with tf.name_scope('Loss'):
    sum_loss = tf.losses.softmax_cross_entropy(
        onehot_labels,
        tf.concat([sum_start_scores, sum_end_scores], axis=0))


    
with tf.name_scope('Accuracy'):
    with tf.name_scope('Prediction'):
        pr_start_idx = tf.to_int32(tf.argmax(sum_start_scores, 1))[0]
        pr_end_idx = tf.to_int32(tf.argmax(sum_end_scores, 1))[0]
    with tf.name_scope('Accuracy'):
        accuracy = tf.py_func(utils.f1_score_int, [pr_start_idx, pr_end_idx, start_end_true[0], start_end_true[1]], tf.float64)
tf.summary.scalar('accuracy', accuracy)
    
print(sum_start_scores.get_shape())    
    
tf.summary.scalar('loss', sum_loss)
with tf.name_scope('Train'):
    train_step = optimizer.minimize(sum_loss)


<unknown>


In [None]:
#=========== Training ==================


def accuracyValidation(acc_batch_size, step):
    acc_accum = 0
    for step_accuracy_ in range(acc_batch_size):
        start_true, end_true, doc, que, doc_v, que_v = sess.run(next_element_valid)
        acc, stat, s, e = sess.run(
            (accuracy, summary_op, pr_start_idx, pr_end_idx),
            feed_dict={question_ph: [que_v], document_ph: [doc_v], start_end_true: [start_true, end_true]}
        )
        #print('Predicted answer', utils.substr(doc, s, e))
        #print('True answer', utils.substr(doc, start_true, end_true))
        writer.add_summary(stat,  step* 10 + step_accuracy_)
        #print("acc", s, e, start_true, end_true)
        acc_accum += acc;
    print('AVG accuracy', acc_accum/acc_batch_size)

def trainStep(step):
    start_true, end_true, doc, que, doc_v, que_v = sess.run(next_element)
    
    if start_true < 0 or end_true > max_sequence_length - 1: 
        print('Ignore step', start_true, end_true)
        return
    
    _,loss, stat = sess.run(
        (train_step, sum_loss, summary_op), 
        feed_dict={question_ph: [que_v], document_ph: [doc_v], start_end_true: [start_true, end_true]}
    )
    #if step % 15 == 0 : print(step, loss, start_true, end_true)
    writer.add_summary(stat,  step)


dataset = ds.getDataset(["./train_train_task_b.csv"], max_sequence_length)
#iterator = dataset.make_one_shot_iterator()
#next_element = iterator.get_next()

dataset_validation = ds.getDataset(["./valid_train_task_b.csv"], max_sequence_length)
iterator_valid = dataset_validation.make_one_shot_iterator()
next_element_valid = iterator_valid.get_next()

summary_op = tf.summary.merge_all()

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    writer = tf.summary.FileWriter(FLAGS.log_path + "/9", sess.graph)
    for epoch_ in range(max_epoch):
        iterator = dataset.make_one_shot_iterator()
        next_element = iterator.get_next()
        #iterator_valid = dataset_validation.make_one_shot_iterator()
        #next_element_valid = iterator_valid.get_next()
        for step_ in range(2000):
            if step_ > 0 and step_ % 50 == 0:
                # --------- ACCURACY -------------
                accuracyValidation(acc_batch_size, step_)
  
            else:
                trainStep(step_)
                
        print('Epoch', epoch_, 'completed')
    print('End')


acc 41 8 68 69
acc 15 13 40 41
acc 25 61 43 45
acc 17 4 12 13
acc 17 32 61 63
AVG accuracy 0.03
acc 55 3 58 62
acc 19 19 25 37
acc 25 61 9 10
acc 53 30 53 57
acc 41 8 30 33
AVG accuracy 0.0
acc 41 8 54 56
acc 28 19 54 56
acc 57 60 7 8
acc 15 8 7 9
acc 55 3 61 66
AVG accuracy 0.0
acc 5 3 7 7
acc 63 30 30 30
acc 64 61 49 50
acc 5 3 9 10
acc 42 16 67 74
AVG accuracy 0.0
acc 64 61 0 0
acc 15 28 21 25
acc 41 76 5 7
acc 35 4 7 9
acc 56 17 29 32
AVG accuracy 0.105263157895
acc 15 18 16 16
acc 56 17 21 23
acc 37 3 24 25
acc 37 3 66 70
acc 53 3 9 14
AVG accuracy 0.08
acc 11 12 71 71
acc 57 60 45 51
acc 13 72 33 36
acc 17 8 31 33
acc 41 40 33 35
AVG accuracy 0.025
acc 17 8 33 33
acc 53 52 6 8
acc 17 8 48 49
acc 17 8 11 14
acc 11 12 26 26
AVG accuracy 0.0
acc 41 8 16 23
acc 41 44 23 24
acc 61 4 38 40
acc 13 72 74 78
acc 41 44 4 7
AVG accuracy 0.0
acc 17 8 75 77
acc 13 72 47 48
acc 35 4 1 1
acc 53 18 54 56
acc 53 38 35 37
AVG accuracy 0.0129032258065
acc 53 18 0 8
acc 41 67 47 50
acc 19 74 29 32
a

acc 5 0 44 47
acc 0 8 17 22
acc 7 9 0 1
AVG accuracy 0.0
acc 5 28 19 21
acc 49 7 42 47
acc 5 28 18 27
acc 24 7 40 41
acc 45 9 21 21
AVG accuracy 0.162091503268
acc 0 3 46 51
acc 8 9 40 45
acc 53 2 30 30
acc 8 1 13 23
acc 0 14 54 54
AVG accuracy 0.0
acc 0 8 63 63
acc 0 7 1 3
acc 0 60 26 30
acc 2 2 27 29
acc 3 0 24 24
AVG accuracy 0.139393939394
acc 28 47 20 21
acc 0 60 67 68
acc 65 2 16 22
acc 0 23 0 4
acc 0 60 0 0
AVG accuracy 0.0754171301446
acc 0 71 50 51
acc 9 5 48 61
acc 0 9 77 77
acc 0 2 31 34
acc 0 27 14 15
AVG accuracy 0.0374774774775
acc 3 42 17 20
acc 0 9 60 62
acc 0 33 31 41
acc 0 8 14 17
acc 0 14 7 11
AVG accuracy 0.16303030303
acc 0 5 57 58
acc 41 32 10 12
acc 0 24 50 53
acc 7 7 23 29
acc 0 0 48 50
AVG accuracy 0.0
acc 0 33 39 44
acc 7 4 56 58
acc 0 0 10 11
acc 18 47 50 60
acc 0 26 55 61
AVG accuracy 0.0
acc 0 60 54 54
acc 0 7 27 29
acc 0 8 59 66
acc 2 2 8 10
acc 0 0 31 33
AVG accuracy 0.00645161290323
acc 0 2 11 13
acc 0 9 24 25
acc 8 1 30 32
acc 0 16 25 29
acc 0 8 36 39
A

KeyboardInterrupt: 