In [63]:
from collections import namedtuple, defaultdict
import json
import numpy as np
from load_dataset import get_preprocessed_dataset, load_json_file
import tensorflow as tf
from tensorflow import identity
from tensorflow.nn import conv2d, dropout, relu, sigmoid, softmax, bidirectional_dynamic_rnn, softmax_cross_entropy_with_logits
from tensorflow.nn.rnn_cell import LSTMCell,DropoutWrapper
from main import get_batch_dict, get_init_charEmbVec
from layer import CharEmbLayer,WordEmbLayer,DenseLayer,HighwayLayer,\
    ContextualEmbLayer,AttentionLayer,TwoLSTMs_ModelingLayer,OutputLayer
import time

In [64]:
mode = 'train'
path = 'D:/Wisdom/git/tmp/BIDAF/data/squad/'
version = 'small'
json_file = path + mode + '-' + version + '.json'
data = load_json_file(json_file)
config, full_dataset, sub_info_dataset, index_dataset, word2vec_dataset = \
    get_preprocessed_dataset(data, mode='train')
is_training=True

n_sample = len(full_dataset['q_dataset'])
max_word_num_x = config['max_word_num_x']
max_word_num_q = config['max_word_num_q']
max_word_len = config['max_word_len']
word_vec_dim = config['word_vec_dim']
d = 175
config['hidden_size'] = d
batch_size = 30
config['batch_size']=batch_size

In [65]:
char_vec_dim=10
tot_char_list=sub_info_dataset['tot_char_list']
config['char_vec_dim']=char_vec_dim
char2vec_dataset=get_init_charEmbVec(index_dataset,tot_char_list,char_vec_dim,initializer=np.random.normal)

In [66]:
tf.reset_default_graph()
xw = tf.placeholder(dtype='float32', shape=[batch_size, max_word_num_x, word_vec_dim], name='x_word')
xc = tf.placeholder(dtype='float32', shape=[batch_size, max_word_num_x, max_word_len, char_vec_dim], name='x_char')
qw = tf.placeholder(dtype='float32', shape=[batch_size, max_word_num_q, word_vec_dim], name='q_word')
qc = tf.placeholder(dtype='float32', shape=[batch_size, max_word_num_q, max_word_len, char_vec_dim], name='q_char')
len_x = tf.placeholder(dtype='int32', shape=[batch_size], name='word_len_x')
len_q = tf.placeholder(dtype='int32', shape=[batch_size], name='word_len_q')
y1 = tf.placeholder(dtype='bool', shape=[batch_size,max_word_num_x], name='y_start')
y2 = tf.placeholder(dtype='bool', shape=[batch_size,max_word_num_x], name='y_end')

In [67]:
cnn_init=tf.truncated_normal_initializer(stddev=0.5)
weight_init=tf.truncated_normal_initializer(stddev=0.5)
bias_init=tf.truncated_normal_initializer(stddev=0.5)

In [68]:
input_channel_num = char_vec_dim
output_channel_num = 100
height = 5
filter_shape = [1, height, input_channel_num, output_channel_num]
stride = [1, 1, 1, 1]
padding = 'VALID'
cnn_xc = CharEmbLayer('CNN', xc, filter_shape, output_channel_num, stride, padding, cnn_init, is_training=is_training)
cnn_qc = CharEmbLayer('CNN', qc, filter_shape, output_channel_num, stride, padding, cnn_init, reuse=True, is_training=is_training)

In [69]:
emb_x = WordEmbLayer('WordEmb_x', xw, cnn_xc)
emb_q = WordEmbLayer('WordEmb_q', qw, cnn_qc)

highway_x = HighwayLayer('Highway', emb_x, weight_init, bias_init, is_training=is_training)
highway_q = HighwayLayer('Highway', emb_q, weight_init, bias_init, reuse=True, is_training=is_training)

h = ContextualEmbLayer('ContEmb', highway_x, d, len_x, is_training=is_training)
u = ContextualEmbLayer('ContEmb', highway_q, d, len_q, reuse=True, is_training=is_training)

G = AttentionLayer('Attention', [h, u], d, weight_init, is_training=is_training)

M = TwoLSTMs_ModelingLayer('Modeling', G, d, len_x, is_training=is_training)
logit1,logit2,p1,p2=OutputLayer('Output', [G,M], d, len_x, weight_init, is_training=is_training)

In [70]:
loss1=tf.reduce_mean(softmax_cross_entropy_with_logits(labels=y1,logits=logit1))
loss2=tf.reduce_mean(softmax_cross_entropy_with_logits(labels=y2,logits=logit2))
loss = loss1 + loss2
optimizer=tf.train.AdamOptimizer().minimize(loss)

In [71]:
#tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

In [72]:
def get_feed_dict(batch_dict):
    feed={xw:batch_dict['xw'],
     xc:batch_dict['xc'],
     qw:batch_dict['qw'],
     qc:batch_dict['qc'],
     len_x:batch_dict['x_len'],
     len_q:batch_dict['q_len'],
     y1:batch_dict['y_start'],
     y2:batch_dict['y_end']}
    return feed

In [73]:
# config=tf.ConfigProto()
# config.gpu_options.allow_growth=True
# sess=tf.Session(config=config)
sess=tf.Session()
init=tf.global_variables_initializer()
sess.run(init)


In [74]:
print('Batch size: ' + str(batch_size))
print('-------- Start --------')
sample_index = np.arange(n_sample)
np.random.shuffle(sample_index)
batch_time = n_sample // batch_size


for i in range(batch_time):
    t_start=time.time()
    index_range =  sample_index[i * batch_size:(i+1) * batch_size]
    feed=get_feed_dict(get_batch_dict(index_range, config, full_dataset, word2vec_dataset, char2vec_dataset))
    l,_=sess.run([loss,optimizer],feed_dict=feed)
    t_end=time.time()
    if i==3:break
    if i%1 == 0 : print('%d/%d complete.\tLoss: %.4f\tTime: %.4f[s]' %(i+1,batch_time,l,t_end-t_start))
sess.close()

Batch size: 30
-------- Start --------
1/109 complete.	Loss: 18.9098	Time: 17.1553[s]
2/109 complete.	Loss: 14.5220	Time: 16.0002[s]
3/109 complete.	Loss: 13.5909	Time: 19.3116[s]
