In [1]:
import tensorlayer as tl
import tensorflow as tf
from tensorlayer.layers import *

import numpy as np
import time
import pickle

In [2]:
batch_size = 32
xvocab_size = 40004
emb_dim = 100

In [3]:
PAD_TOKEN = 0
glove_data_directory = '.'

word2idx = { 'PAD': PAD_TOKEN } # dict so we can lookup indices for tokenising our text later from string to sequence of integers
weights = []

with open (glove_data_directory + '/' +'glove.6B.100d.txt', 'r') as file:
    for index, line in enumerate(file):
        values = line.split() # Word and weights separated by space
        word = values[0] # Word is first symbol on each line
        word_weights = np.asarray(values[1:], dtype=np.float32) # Remainder of line is weights for word
        word2idx[word] = index + 1 # PAD is our zeroth index so shift by one
        weights.append(word_weights)
        
        if index + 1 == 40_000:
            # Limit vocabulary to top 40k terms
            break

EMBEDDING_DIMENSION = len(weights[0])
# Insert the PAD weights at index 0 now we know the embedding dimension
weights.insert(0, np.random.randn(EMBEDDING_DIMENSION))

# Append unknown and pad to end of vocab and initialize as random
UNKNOWN_TOKEN=len(weights)
word2idx['UNK'] = UNKNOWN_TOKEN
weights.append(np.random.randn(EMBEDDING_DIMENSION))


# Construct our final vocab
weights = np.asarray(weights, dtype=np.float32)

VOCAB_SIZE=weights.shape[0]



In [4]:
idx2word = list(word2idx.keys())
VOCAB_LENGTH = len(word2idx)


unk_id = word2idx['UNK']   
pad_id = word2idx['PAD']     

start_id = VOCAB_LENGTH 
end_id = VOCAB_LENGTH + 1

word2idx.update({'start_id': start_id})
word2idx.update({'end_id': end_id})
idx2word = idx2word + ['start_id', 'end_id']
xvocab_size = len(idx2word)


VOCAB_LENGTH = VOCAB_LENGTH + 2


In [5]:
def model(encode_seqs, decode_seqs, is_train=True, reuse=False):
    with tf.variable_scope("model", reuse=reuse):
        # for chatbot, you can use the same embedding layer,
        # for translation, you may want to use 2 seperated embedding layers
        with tf.variable_scope("embedding") as vs:
            net_encode = EmbeddingInputlayer(
                inputs = encode_seqs,
                vocabulary_size = xvocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            vs.reuse_variables()
#             tl.layers.set_name_reuse(True) # remove if TL version == 1.8.0+
            net_decode = EmbeddingInputlayer(
                inputs = decode_seqs,
                vocabulary_size = xvocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
        net_rnn = Seq2Seq(net_encode, net_decode,
                cell_fn = tf.contrib.rnn.BasicLSTMCell,
                n_hidden = emb_dim,
                initializer = tf.random_uniform_initializer(-0.1, 0.1),
                encode_sequence_length = retrieve_seq_length_op2(encode_seqs),
                decode_sequence_length = retrieve_seq_length_op2(decode_seqs),
                initial_state_encode = None,
                dropout = (0.5 if is_train else None),
                n_layer = 3,
                return_seq_2d = True,
                name = 'seq2seq')
        net_out = DenseLayer(net_rnn, n_units=xvocab_size, act=tf.identity, name='output')
    return net_out, net_rnn

In [6]:
with tf.device('/device:GPU:0'):
    encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
    decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
    target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
    target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask()
net_out, _ = model(encode_seqs, decode_seqs, is_train=True, reuse=False)

[TL] EmbeddingInputlayer model/embedding/seq_embedding: (40004, 100)
[TL] EmbeddingInputlayer model/embedding/seq_embedding: (40004, 100)
[TL] [*] Seq2Seq model/seq2seq: n_hidden: 100 cell_fn: BasicLSTMCell dropout: 0.5 n_layer: 3
[TL] DynamicRNNLayer model/seq2seq/encode: n_hidden: 100, in_dim: 3 in_shape: (32, ?, 100) cell_fn: BasicLSTMCell dropout: 0.5 n_layer: 3
[TL]        batch_size (concurrent processes): 32
[TL] DynamicRNNLayer model/seq2seq/decode: n_hidden: 100, in_dim: 3 in_shape: (32, ?, 100) cell_fn: BasicLSTMCell dropout: 0.5 n_layer: 3
[TL]        batch_size (concurrent processes): 32
[TL] DenseLayer  model/output: 40004 No Activation


In [7]:
with tf.device('/device:GPU:0'):
    encode_seqs2 = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
    decode_seqs2 = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
net, net_rnn = model(encode_seqs2, decode_seqs2, is_train=False, reuse=True)
y = tf.nn.softmax(net.outputs)

[TL] EmbeddingInputlayer model/embedding/seq_embedding: (40004, 100)
[TL] EmbeddingInputlayer model/embedding/seq_embedding: (40004, 100)
[TL] [*] Seq2Seq model/seq2seq: n_hidden: 100 cell_fn: BasicLSTMCell dropout: None n_layer: 3
[TL] DynamicRNNLayer model/seq2seq/encode: n_hidden: 100, in_dim: 3 in_shape: (1, ?, 100) cell_fn: BasicLSTMCell dropout: None n_layer: 3
[TL]        batch_size (concurrent processes): 1
[TL] DynamicRNNLayer model/seq2seq/decode: n_hidden: 100, in_dim: 3 in_shape: (1, ?, 100) cell_fn: BasicLSTMCell dropout: None n_layer: 3
[TL]        batch_size (concurrent processes): 1
[TL] DenseLayer  model/output: 40004 No Activation


In [8]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
tl.layers.initialize_global_variables(sess)
tl.files.load_and_assign_npz(sess=sess, name='n2.npz', network=net)

Instructions for updating: This API is deprecated in favor of `tf.global_variables_initializer`

[TL] [*] Load n2.npz SUCCESS!


<tensorlayer.layers.dense.base_dense.DenseLayer at 0x7f3020369eb8>

In [9]:
def answer_me_bot(inputs=""):
    seed = inputs
    seed_id = [word2idx[w] for w in seed.split(" ")]
    state = sess.run(net_rnn.final_state_encode,
                                    {encode_seqs2: [seed_id]})
    o, state = sess.run([y, net_rnn.final_state_decode],
                                        {net_rnn.initial_state_decode: state,
                                        decode_seqs2: [[start_id]]})
    w_id = tl.nlp.sample_top(o[0], top_k=3)
    w = idx2word[w_id]
    # 3. decode, feed state iteratively
    sentence = [w]
    for _ in range(30): # max sentence length
        o, state = sess.run([y, net_rnn.final_state_decode],
                            {net_rnn.initial_state_decode: state,
                            decode_seqs2: [[w_id]]})
        w_id = tl.nlp.sample_top(o[0], top_k=2)
        w = idx2word[w_id]
        if w_id == end_id:
            break
        sentence = sentence + [w]
    print(" >", ' '.join(sentence))

In [47]:
answer_me_bot("you are such a boring guy")

 > yes .
