In [3]:
import tensorflow as tf
import tensorlayer as tl
import numpy as np
from tensorlayer.cost import cross_entropy_seq, cross_entropy_seq_with_mask
from tqdm import tqdm
from sklearn.utils import shuffle
from data.cornell_corpus import data
from tensorlayer.models.seq2seq import Seq2seq
from tensorlayer.models.seq2seq_with_attention import Seq2seqLuongAttention
import os

# Load saved model and required vectorizer for data preprocessing

In [4]:
def initial_setup(data_corpus):
    metadata, idx_q, idx_a = data.load_data(PATH='data/{}/'.format(data_corpus))
    (trainX, trainY), (testX, testY), (validX, validY) = data.split_dataset(idx_q, idx_a)
    trainX = tl.prepro.remove_pad_sequences(trainX.tolist())
    trainY = tl.prepro.remove_pad_sequences(trainY.tolist())
    testX = tl.prepro.remove_pad_sequences(testX.tolist())
    testY = tl.prepro.remove_pad_sequences(testY.tolist())
    validX = tl.prepro.remove_pad_sequences(validX.tolist())
    validY = tl.prepro.remove_pad_sequences(validY.tolist())
    return metadata, trainX, trainY, testX, testY, validX, validY


data_corpus = "cornell_corpus"
#data preprocessing
metadata, trainX, trainY, testX, testY, validX, validY = initial_setup(data_corpus)


batch_size = 32
src_vocab_size = len(metadata['idx2w']) # 8002 (0~8001)
emb_dim = 1024

word2idx = metadata['w2idx']   # dict  word 2 index
idx2word = metadata['idx2w']   # list index 2 word

unk_id = word2idx['unk']   # 1
pad_id = word2idx['_']     # 0

start_id = src_vocab_size  # 8002
end_id = src_vocab_size + 1  # 8003

word2idx.update({'start_id': start_id})
word2idx.update({'end_id': end_id})
idx2word = idx2word + ['start_id', 'end_id']

src_vocab_size = tgt_vocab_size = src_vocab_size + 2

num_epochs = 1
vocabulary_size = src_vocab_size
decoder_seq_length = 20


def inference(seed, top_n):
    model_.eval()
    seed_id = [word2idx.get(w, unk_id) for w in seed.split(" ")]
    sentence_id = model_(inputs=[[seed_id]], seq_length=20, start_token=start_id, top_n = top_n)
    sentence = []
    for w_id in sentence_id[0]:
        w = idx2word[w_id]
        if w == 'end_id':
            break
        sentence = sentence + [w]
    return sentence

# create model object
model_ = Seq2seq(
        decoder_seq_length = decoder_seq_length,
        cell_enc=tf.keras.layers.GRUCell,
        cell_dec=tf.keras.layers.GRUCell,
        n_layer=3,
        n_units=256,
        embedding_layer=tl.layers.Embedding(vocabulary_size=vocabulary_size, embedding_size=emb_dim),
        )
    
optimizer = tf.optimizers.Adam(learning_rate=0.001)

# Load the pretrained model
load_weights = tl.files.load_npz(name='model-cornell_corpus.npz')
tl.files.assign_weights(load_weights, model_)


[TL] Embedding embedding_1: (8004, 1024)
[TL] RNN rnn_1: cell: GRUCell, n_units: 256
[TL] RNN rnn_2: cell: GRUCell, n_units: 256
[TL] RNN rnn_3: cell: GRUCell, n_units: 256
[TL] RNN rnn_4: cell: GRUCell, n_units: 256
[TL] RNN rnn_5: cell: GRUCell, n_units: 256
[TL] RNN rnn_6: cell: GRUCell, n_units: 256
[TL] Reshape reshape_1
[TL] Dense  dense_1: 8004 No Activation
[TL] Reshape reshape_2
[TL] Reshape reshape_3


[<tf.Variable 'UnreadVariable' shape=(1024, 768) dtype=float32, numpy=
 array([[-0.03148898,  0.13925529, -0.29645008, ..., -0.11974213,
          0.03786283, -0.1295784 ],
        [ 0.17386591,  0.06829999, -0.0285179 , ..., -0.37609744,
          0.04056163,  0.23004293],
        [-0.02074117,  0.27572682,  0.32532418, ..., -0.09201859,
         -0.14367212, -0.08592128],
        ...,
        [-0.5577352 ,  0.18776865,  0.17842697, ..., -0.06257067,
          0.16745389,  0.07865053],
        [-0.01389502, -0.16177991,  0.0844833 , ..., -0.01332104,
         -0.11778299,  0.08262827],
        [ 0.132198  ,  0.16552937, -0.2364373 , ...,  0.08745446,
          0.1203819 , -0.06019237]], dtype=float32)>,
 <tf.Variable 'UnreadVariable' shape=(256, 768) dtype=float32, numpy=
 array([[ 0.2109836 ,  0.22751983, -0.20228454, ...,  0.06346867,
         -0.22863933,  0.2537897 ],
        [-0.07739688,  0.00729488,  0.17323415, ..., -0.07905708,
          0.08564501,  0.11764973],
        [-0.

# Test the model on some basic sentences

In [5]:
test_questions = ["you can do it", "youre terrible", "nah i dont think so"]

In [6]:
 for question in test_questions:
    print("Query >", question)
    top_n = 3 # number of top answers needed
    for i in range(top_n):
        sentence = inference(question, top_n)
        print(" >", ' '.join(sentence))

Query > you can do it
 > yes only
 > yes jabez lovely now hurry there alone
 > yes jabez i think youll keep her alone
Query > youre terrible
 > some arent getting on going home to unk
 > some goddamn guy what do i say look what the hell is out now
 > some day why shouldnt it be back why
Query > nah i dont think so
 > if anything meant asking
 > if anything didnt answer the unk
 > if you dont hear that duffy or hear you
