In [None]:
import os
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras

from utils.write import training_data, test_data, glove


In [None]:
# run params
SECTION = 'write'
RUN_ID = '0002'
DATA_NAME = 'qa'
RUN_FOLDER = 'run/{}/'.format(SECTION)
RUN_FOLDER += '_'.join([RUN_ID, DATA_NAME])

if not os.path.exists(RUN_FOLDER):
    os.makedirs(RUN_FOLDER)
    os.makedirs(os.path.join(RUN_FOLDER, 'viz'))
    os.makedirs(os.path.join(RUN_FOLDER, 'images'))
    os.makedirs(os.path.join(RUN_FOLDER, 'weights'))

mode = 'build'  # 'load'

In [None]:
training_data_gen = training_data()
test_data_gen = test_data()

t = next(training_data_gen)
idx = 0

print('document_tokens\n', t['document_tokens'][idx])
print('\n')
print('question_input_tokens\n', t['question_input_tokens'][idx])
print('\n')
print('answer_masks\n', t['answer_masks'][idx])
print('\n')
print('answer_labels\n', t['answer_labels'][idx])
print('\n')
print('question_output_tokens\n', t['question_output_tokens'][idx])


In [None]:
# GloVe
VOCAB_SIZE = glove.shape[0]
EMBEDDING_DIMENS = glove.shape[1]

print('GLOVE')
print('VOCAB_SIZE: ', VOCAB_SIZE)
print('EMBEDDING_DIMENS: ', EMBEDDING_DIMENS)

In [None]:
GRU_UNITS = 100
MAX_DOC_SIZE = None
MAX_ANSWER_SIZE = None
MAX_Q_SIZE = None

document_tokens = keras.Input(shape=(MAX_DOC_SIZE,), name='document_tokens')

embedding = keras.layers.Embedding(
    input_dim=VOCAB_SIZE,
    output_dim=EMBEDDING_DIMENS,
    weights=[glove],
    mask_zero=True,
    name='embedding'
)
document_emb = embedding(document_tokens)

answer_outputs = keras.layers.Bidirectional(
    keras.layers.GRU(GRU_UNITS, return_sequences=True),
    name='answer_outputs'
)(document_emb)
answer_tags = keras.layers.Dense(
    2, activation='softmax',
    name='answer_tags'
)(answer_outputs)

encoder_input_mask = keras.Input(
    shape=(MAX_ANSWER_SIZE, MAX_DOC_SIZE),
    name='encoder_input_mask'
)
encoder_inputs = keras.layers.Lambda(
    lambda x: tf.matmul(x[0], x[1]),
    name="encoder_inputs"
)([encoder_input_mask, answer_outputs])
encoder_cell = keras.layers.GRU(
    2 * GRU_UNITS, name='encoder_cell'
)(encoder_inputs)

decoder_inputs = keras.Input(shape=(MAX_Q_SIZE,), name='decoder_inputs')
decoder_emb = embedding(decoder_inputs)
decoder_emb.trainable = False
decoder_cell = keras.layers.GRU(
    2 * GRU_UNITS,
    return_sequences=True,
    name='decoder_cell'
)
decoder_states = decoder_cell(decoder_emb, initial_state=[encoder_cell])

decoder_projection = keras.layers.Dense(
    VOCAB_SIZE, name='decoder_projection',
    activation='softmax', use_bias=False
)
decoder_outputs = decoder_projection(decoder_states)

total_model = keras.Model([document_tokens, decoder_inputs, encoder_input_mask],
                          [answer_tags, decoder_outputs])
keras.utils.plot_model(total_model, to_file='model.png', show_shapes=True)

print(decoder_emb.shape)
total_model.summary()

In [None]:
answer_model = keras.Model(document_tokens, [answer_tags])
decoder_initial_state_model = keras.Model([document_tokens, encoder_input_mask], [encoder_cell])

In [None]:
#### INFERENCE MODEL ####
decoder_inputs_dynamic = keras.Input(shape=(1,), name="decoder_inputs_dynamic")
decoder_emb_dynamic = embedding(decoder_inputs_dynamic)
decoder_init_state_dynamic = keras.Input(
    shape=(2 * GRU_UNITS,),
    name='decoder_init_state_dynamic'
)  # the embedding of the previous word
decoder_states_dynamic = decoder_cell(decoder_emb_dynamic, 
                                      initial_state=[decoder_init_state_dynamic])
decoder_outputs_dynamic = decoder_projection(decoder_states_dynamic)

question_model = keras.Model([decoder_inputs_dynamic, decoder_init_state_dynamic],
                             [decoder_outputs_dynamic, decoder_states_dynamic])


In [None]:
#### COMPILE TRAINING MODEL ####
opti = keras.optimizers.Adam(lr=0.001)
total_model.compile(loss=['sparse_categorical_crossentropy','sparse_categorical_crossentropy'],
                    optimizer=opti, loss_weights=[1, 1])


In [None]:
training_loss_history = []
test_loss_history = []

EPOCHS = 2000
start_epoch = 1

for epoch in range(start_epoch, start_epoch + EPOCHS + 1):
    print("Epoch {0}".format(epoch))

    for i, batch in enumerate(training_data()):
        val_batch = next(test_data_gen, None)

        if val_batch is None:
            test_data_gen = test_data()
            val_batch = next(test_data_gen, None)

        training_loss = total_model.train_on_batch(
            [batch['document_tokens'], batch['question_input_tokens'], batch['answer_masks']],
            [np.expand_dims(batch['answer_labels'], axis=-1), np.expand_dims(batch['question_output_tokens'], axis=-1)]
        )

        test_loss = total_model.test_on_batch(
            [val_batch['document_tokens'], val_batch['question_input_tokens'], val_batch['answer_masks']],
            [np.expand_dims(val_batch['answer_labels'], axis=-1), np.expand_dims(val_batch['question_output_tokens'], axis=-1)]
        )

        training_loss_history.append(training_loss)
        test_loss_history.append(test_loss)

        print("{}: Train Loss: {} | Test Loss: {}".format(
            i, training_loss, test_loss))

    total_model.save_weights(os.path.join(RUN_FOLDER, 'weights/weights_{}.h5'.format(epoch)))

In [None]:
#### SHOW LOSSES ####
plt.plot(np.array(training_loss_history)[:,0])
plt.plot(np.array(test_loss_history)[:,0])
plt.show()
        
pkl.dump([training_loss_history, test_loss_history], open(os.path.join(RUN_FOLDER, 'weights/histories.pkl'), 'wb'))