In [15]:
import tensorflow as tf
import tensorlayer as tl
import numpy as np
from tensorlayer.cost import cross_entropy_seq, cross_entropy_seq_with_mask
from tqdm import tqdm
from sklearn.utils import shuffle
from data.cornell_corpus import data
from tensorlayer.models.seq2seq import Seq2seq
from tensorlayer.models.seq2seq_with_attention import Seq2seqLuongAttention
import os

# Load saved model and required vectorizer for data preprocessing

In [16]:
def initial_setup(data_corpus):
    metadata, idx_q, idx_a = data.load_data(PATH='data/{}/'.format(data_corpus))
    (trainX, trainY), (testX, testY), (validX, validY) = data.split_dataset(idx_q, idx_a)
    trainX = tl.prepro.remove_pad_sequences(trainX.tolist())
    trainY = tl.prepro.remove_pad_sequences(trainY.tolist())
    testX = tl.prepro.remove_pad_sequences(testX.tolist())
    testY = tl.prepro.remove_pad_sequences(testY.tolist())
    validX = tl.prepro.remove_pad_sequences(validX.tolist())
    validY = tl.prepro.remove_pad_sequences(validY.tolist())
    return metadata, trainX, trainY, testX, testY, validX, validY


data_corpus = "cornell_corpus"
#data preprocessing
metadata, trainX, trainY, testX, testY, validX, validY = initial_setup(data_corpus)


batch_size = 32
src_vocab_size = len(metadata['idx2w']) # 8002 (0~8001)
emb_dim = 1024

word2idx = metadata['w2idx']   # dict  word 2 index
idx2word = metadata['idx2w']   # list index 2 word

unk_id = word2idx['unk']   # 1
pad_id = word2idx['_']     # 0

start_id = src_vocab_size  # 8002
end_id = src_vocab_size + 1  # 8003

word2idx.update({'start_id': start_id})
word2idx.update({'end_id': end_id})
idx2word = idx2word + ['start_id', 'end_id']

src_vocab_size = tgt_vocab_size = src_vocab_size + 2

num_epochs = 1
vocabulary_size = src_vocab_size
decoder_seq_length = 20


def inference(seed, top_n):
    model_.eval()
    seed_id = [word2idx.get(w, unk_id) for w in seed.split(" ")]
    sentence_id = model_(inputs=[[seed_id]], seq_length=20, start_token=start_id, top_n = top_n)
    sentence = []
    for w_id in sentence_id[0]:
        w = idx2word[w_id]
        if w == 'end_id':
            break
        sentence = sentence + [w]
    return sentence

# create model object
model_ = Seq2seq(
        decoder_seq_length = decoder_seq_length,
        cell_enc=tf.keras.layers.GRUCell,
        cell_dec=tf.keras.layers.GRUCell,
        n_layer=3,
        n_units=256,
        embedding_layer=tl.layers.Embedding(vocabulary_size=vocabulary_size, embedding_size=emb_dim),
        )
    
optimizer = tf.optimizers.Adam(learning_rate=0.001)

# Load the pretrained model
load_weights = tl.files.load_npz(name='model-cornell_corpus.npz')
tl.files.assign_weights(load_weights, model_)


[TL] Embedding embedding_4: (8004, 1024)
[TL] RNN rnn_19: cell: GRUCell, n_units: 256
[TL] RNN rnn_20: cell: GRUCell, n_units: 256
[TL] RNN rnn_21: cell: GRUCell, n_units: 256
[TL] RNN rnn_22: cell: GRUCell, n_units: 256
[TL] RNN rnn_23: cell: GRUCell, n_units: 256
[TL] RNN rnn_24: cell: GRUCell, n_units: 256
[TL] Reshape reshape_10
[TL] Dense  dense_4: 8004 No Activation
[TL] Reshape reshape_11
[TL] Reshape reshape_12


[<tf.Variable 'UnreadVariable' shape=(1024, 768) dtype=float32, numpy=
 array([[ 0.03625779,  0.07043416, -0.25960258, ..., -0.15348257,
          0.07799313, -0.10307632],
        [ 0.13279444,  0.18888839, -0.03702531, ..., -0.33829084,
          0.0587705 ,  0.3022898 ],
        [ 0.02307981,  0.3312318 ,  0.27248263, ..., -0.16591193,
         -0.15828541,  0.0649059 ],
        ...,
        [-0.5302294 ,  0.12245173,  0.23501758, ..., -0.10676858,
          0.19127461, -0.01364994],
        [-0.01588898, -0.10458633,  0.18273528, ...,  0.02057895,
         -0.09938489,  0.10874543],
        [ 0.14374542,  0.18621746, -0.19984797, ...,  0.06402951,
          0.18506476, -0.08928435]], dtype=float32)>,
 <tf.Variable 'UnreadVariable' shape=(256, 768) dtype=float32, numpy=
 array([[ 0.15325473,  0.18282092, -0.21736751, ...,  0.0871022 ,
         -0.2144231 ,  0.2054066 ],
        [-0.05306917, -0.00787334,  0.15866536, ..., -0.13309243,
          0.05455199, -0.00949783],
        [-0.

# Test the model on some basic sentences

In [17]:
test_questions = ["hey happy birthday have a nice day", "the important ones are still here"]

In [18]:
 for question in test_questions:
    print("Query >", question)
    top_n = 3 # number of top answers needed
    for i in range(top_n):
        sentence = inference(question, top_n)
        print(" >", ' '.join(sentence))

Query > shall i leave
 > do it as well go to unk
 > do it not trust you
 > do we know why
Query > youd better be quiet sandy
 > meeting im afraid im doing
 > meeting im here
 > meeting you
