# Building a Chatbot

In this project, we will build a chatbot using conversations from Cornell University's [Movie Dialogue Corpus](https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html). The main features of our model are LSTM cells, a bidirectional dynamic RNN, and decoders with attention. 

The conversations will be cleaned rather extensively to help the model to produce better responses. As part of the cleaning process, punctuation will be removed, rare words will be replaced with "UNK" (our "unknown" token), longer sentences will not be used, and all letters will be in the lowercase. 

With a larger amount of data, it would be more practical to keep features, such as punctuation. However, I am using FloydHub's GPU services and I don't want to get carried away with too training for too long.

In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
import time

from corpus import Corpus

tf.__version__

'1.8.0'

Most of the code to load the data is courtesy of https://github.com/suriyadeepan/practical_seq2seq/blob/master/datasets/cornell_corpus/data.py.

### Inspect and Load the Data

In [454]:
cornell_corpus = Corpus("movie_lines.txt", "movie_conversations.txt", max_vocab=8100, max_line_length=30)
questions_text = cornell_corpus.prompts
answers_text = cornell_corpus.answers
questions_int = cornell_corpus.prompts_int
answers_int = cornell_corpus.answers_int

UNK = cornell_corpus.unk
vocab2int = cornell_corpus.vocab2int

METATOKEN_INDEX = len(vocab2int)
EOS = "<EOS>"
PAD = "<PAD>"
GO = "<GO>"
METATOKENS = [EOS, PAD, GO]
for metatoken in METATOKENS:
    vocab2int[metatoken] = METATOKEN_INDEX #Map all metatokens to same index (and therefore embedding)
    
GENERIC_META = "<META>"
int2vocab = {index:word for (word, index) in vocab2int.items()}
int2vocab[METATOKEN_INDEX] = GENERIC_META

source_vocab_size = len(vocab2int)
dest_vocab_size = len(vocab2int)

vocab_dicts = (vocab2int, int2vocab)
(questions_vocab_to_int, questions_int_to_vocab) = vocab_dicts
(answers_vocab_to_int, answers_int_to_vocab) = vocab_dicts
source_vocab_size = len(questions_vocab_to_int)
dest_vocab_size = len(answers_vocab_to_int)

In [455]:
#Add EOS tokens to target data now
for i in range(len(answers_int)):
    answers_text[i] += " " + EOS
    answers_int[i].append(answers_vocab_to_int[EOS])


In [456]:
# Sort questions and answers by the length of questions.
# This will reduce the amount of padding during training
# Which should speed up training and help to reduce the loss

max_source_line_length = max( [len(sentence) for sentence in questions_int])
max_targ_line_length = max([len(sentence) for sentence in answers_int])
max_line_length = max(max_source_line_length, max_targ_line_length)

sorted_questions = []
sorted_answers = []

for length in range(1, max_line_length+1):
    for i in enumerate(questions_int):
        if len(i[1]) == length:
            sorted_questions.append(questions_int[i[0]])
            sorted_answers.append(answers_int[i[0]])

print(len(sorted_questions))
print(len(sorted_answers))
print()
for i in range(3):
    print(sorted_questions[i])
    print(sorted_answers[i])
    print()

196649
196649

[4709]
[2, 117, 3, 4709, 1, 18, 57, 2, 2980, 13, 6, 2348, 8100, 5082, 13, 1919, 27, 588, 1, 68, 615, 328, 44, 95, 8101]

[56]
[8100, 2604, 44, 276, 4, 28, 99, 2349, 78, 44, 557, 515, 313, 87, 7, 26, 31, 36, 44, 48, 460, 13, 7, 84, 86, 8101]

[54]
[66, 8101]



<H1>Word2Vec</H1>

In [457]:
import nltk

In [458]:
combined_corpus=[]

In [459]:
combined_corpus.extend(questions_text)
combined_corpus.extend(answers_text)

In [460]:
len(combined_corpus)

393700

In [474]:
tok_corp = [nltk.word_tokenize(sent) for sent in combined_corpus]

In [475]:
len(tok_corp)

393700

In [462]:
for i in range(len(tok_corp)):
    tok_corp[i] = [word.lower() for word in tok_corp[i] if re.match('^[a-zA-Z]+', word)]

In [463]:
tok_corp

[['can',
  'we',
  'make',
  'this',
  'quick',
  'unk',
  'unk',
  'and',
  'andrew',
  'barrett',
  'are',
  'having',
  'an',
  'incredibly',
  'unk',
  'public',
  'break',
  'up',
  'on',
  'the',
  'unk',
  'again'],
 ['well',
  'i',
  'thought',
  'we',
  'would',
  'start',
  'with',
  'unk',
  'if',
  'that',
  'is',
  'okay',
  'with',
  'you'],
 ['not', 'the', 'hacking', 'and', 'unk', 'and', 'spitting', 'part', 'please'],
 ['you',
  'are',
  'asking',
  'me',
  'out',
  'that',
  'is',
  'so',
  'cute',
  'what',
  'is',
  'your',
  'name',
  'again'],
 ['no',
  'no',
  'it',
  'is',
  'my',
  'fault',
  'we',
  'did',
  'not',
  'have',
  'a',
  'proper',
  'introduction'],
 ['cameron'],
 ['the',
  'thing',
  'is',
  'cameron',
  'i',
  'am',
  'at',
  'the',
  'mercy',
  'of',
  'a',
  'particularly',
  'unk',
  'breed',
  'of',
  'loser',
  'my',
  'sister',
  'i',
  'can',
  'not',
  'date',
  'until',
  'she',
  'does'],
 ['why'],
 ['unk',
  'mystery',
  'she',
  'used'

In [464]:
from gensim.models import Word2Vec
model = Word2Vec(sentences=tok_corp, size=1024, window=5, min_count=1, workers=4, sg=0)

In [465]:
model.wv['well']

array([-0.27309787,  0.15266134, -0.2306928 , ...,  0.07636208,
        0.2967297 ,  0.10900455], dtype=float32)

In [466]:
wordVecs = model.wv

In [467]:
len(model.wv.vocab)

7901

In [468]:
word_vecs = np.zeros((len(model.wv.vocab),1024))
for i,word in enumerate(model.wv.index2word):
        word_vecs[i] = model[word]
      

  This is separate from the ipykernel package so we can avoid doing imports until


In [469]:
len(word_vecs)

7901

In [472]:
len(questions_vocab_to_int)

8104

In [471]:
import numpy
np.save('word_Vecs.npy',word_vecs)

In [253]:
#FIXME: This really should be something like "preprocess_targets"
def process_decoding_input(target_data, vocab_to_int, batch_size):
    '''Remove the last word id from each batch and concat the <GO> to the begining of each batch'''
    ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
    dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int[GO]), ending], 1)
    return dec_input


In [254]:
def dropout_cell(rnn_size, keep_prob):
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    return tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob)

def multi_dropout_cell(rnn_size, keep_prob, num_layers):    
    return tf.contrib.rnn.MultiRNNCell( [dropout_cell(rnn_size, keep_prob) for _ in range(num_layers)] )

In [255]:
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_lengths):
    """
    Create the encoding layer
    
    Returns a tuple `(outputs, output_states)` where
      outputs is a 2-tuple of vectors of dimensions [sequence_length, rnn_size] for the forward and backward passes
      output_states is a 2-tupe of the final hidden states of the forward and backward passes
    
    """
    forward_cell = multi_dropout_cell(rnn_size, keep_prob, num_layers)
    backward_cell = multi_dropout_cell(rnn_size, keep_prob, num_layers)
    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw = forward_cell,
                                                   cell_bw = backward_cell,
                                                   sequence_length = sequence_lengths,
                                                   inputs = rnn_inputs, 
                                                   dtype=tf.float64)
    return outputs, states

## Decoding

In [404]:
def decoding_layer(enc_state, enc_outputs, dec_embed_input, dec_embeddings, #Inputs
                        rnn_size, num_layers, output_layer, #Architecture
                        keep_prob, beam_width, #Hypeparameters
                        target_lengths, batch_size,
                        vocab_to_int): 
    
    with tf.variable_scope("decoding", reuse=tf.AUTO_REUSE) as decoding_scope:
        dec_cell = multi_dropout_cell(rnn_size, keep_prob, num_layers)
        init_dec_state_size = batch_size
        print(dec_cell.output_size)
        #TRAINING
        train_attn = tf.contrib.seq2seq.BahdanauAttention(num_units=dec_cell.output_size, memory=enc_outputs,dtype=tf.float64)
        
        train_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, train_attn,
                                                    attention_layer_size=dec_cell.output_size)
        
        
        helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, target_lengths, time_major=False)
        train_decoder = tf.contrib.seq2seq.BasicDecoder(train_cell, helper,
                            train_cell.zero_state(init_dec_state_size, tf.float64).clone(cell_state=enc_state),
                            output_layer = output_layer)
        outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(train_decoder, scope=decoding_scope)
        logits = outputs.rnn_output

        #INFERENCE
        #Tile inputs
        enc_state = tf.contrib.seq2seq.tile_batch(enc_state, beam_width)
        enc_outputs = tf.contrib.seq2seq.tile_batch(enc_outputs, beam_width)
        init_dec_state_size *= beam_width
        
        infer_attn = tf.contrib.seq2seq.BahdanauAttention(num_units=dec_cell.output_size, memory=enc_outputs,dtype=tf.float64)
        infer_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, infer_attn,
                                                    attention_layer_size=dec_cell.output_size)
        
        start_tokens = tf.tile([vocab_to_int["<GO>"]], [batch_size])
        end_token = vocab_to_int["<EOS>"]
        
        decoder = tf.contrib.seq2seq.BeamSearchDecoder(cell = infer_cell,
            embedding = dec_embeddings,
            start_tokens = start_tokens, #Not by batch_size*beam_width, strangely
            end_token = end_token,
            beam_width = beam_width,
            initial_state = infer_cell.zero_state(init_dec_state_size, tf.float64).clone(cell_state=enc_state),
            output_layer = output_layer
        )    
        final_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, scope=decoding_scope)
        
        ids = final_decoder_output.predicted_ids
        beams = ids
                
    return logits, beams

In [412]:
def seq2seq_model(wordVecs,input_data, target_data, keep_prob, batch_size,
                  source_lengths, target_sequence_lengths,
                  answers_vocab_size, questions_vocab_size, enc_embedding_size, dec_embedding_size,
                  rnn_size, num_layers, beam_width, 
                  questions_vocab_to_int):
    
    '''   
    enc_embed_input = tf.contrib.layers.embed_sequence(input_data, 
                                                       questions_vocab_size, 
                                                       enc_embedding_size,
                                                       initializer = tf.random_uniform_initializer(0,1))
    '''
    W = tf.Variable(wordVecs,trainable=False,name="W")
    enc_embed_input = tf.nn.embedding_lookup(W, input_data)
    print(enc_embed_input.shape)
    enc_outputs, enc_states = encoding_layer(enc_embed_input, rnn_size, num_layers, keep_prob, source_lengths)    
    concatenated_enc_output = tf.concat(enc_outputs, -1)
    init_dec_state = enc_states[0]    
    
    
    dec_input = process_decoding_input(target_data, questions_vocab_to_int, batch_size)
    #print(dec_input.shape)
    dec_embeddings = W 
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
    print(dec_embed_input.shape)
    
    output_layer = tf.layers.Dense(answers_vocab_size,bias_initializer=tf.zeros_initializer(),activation=tf.nn.relu)
    logits, beams = decoding_layer(init_dec_state,
                            concatenated_enc_output,
                            dec_embed_input,
                            dec_embeddings,
                            rnn_size, 
                            num_layers,
                            output_layer,
                            keep_prob,
                            beam_width,
                            target_sequence_lengths, 
                            batch_size,
                            answers_vocab_to_int,
                            )
    
    
    return logits, beams

In [413]:
# Set the Hyperparameters

#Network Architecture
rnn_size = 128
num_layers = 2
encoding_embedding_size = 1024
decoding_embedding_size = 1024

#Training
epochs = 100
batch_size = 128
learning_rate = 0.001
learning_rate_decay = 0.3
min_learning_rate = 0.00001
keep_probability = 0.75
vocab_size = len(answers_vocab_to_int)
#Decoding
beam_width = 10
wordVecs = np.load('word_Vecs.npy')

In [414]:
vocab_size

8104

In [415]:
len(questions_vocab_to_int)

8104

In [416]:
type(wordVecs)

numpy.ndarray

In [417]:
def model_inputs(batch_size):
    '''Create palceholders for inputs to the model'''
    input_data = tf.placeholder(tf.int32, [batch_size, None], name='input')
    targets = tf.placeholder(tf.int32, [batch_size, None], name='targets')
    lr = tf.placeholder(tf.float64, name='learning_rate')
    keep_prob = tf.placeholder(tf.float64, name='keep_prob')

    return input_data, targets, lr, keep_prob

In [418]:
# Reset the graph to ensure that it is ready for training
tf.reset_default_graph()


# Placeholders for feed_dict    
input_data, targets, lr, keep_prob = model_inputs(batch_size)

source_lengths = tf.placeholder(tf.int32, [batch_size], name="source_lengths")
max_sequence_length_batch = tf.placeholder(tf.int32)

input_shape = tf.shape(input_data)
target_sequence_lengths = tf.fill([input_shape[0]], max_sequence_length_batch)

# Create the training and inference logits
#FIXME: Change "batch_size" to input_shape[0]?
train_logits, beams = \
seq2seq_model(wordVecs,input_data,
              #tf.reverse(input_data, [-1]),
              targets, keep_prob, batch_size,
              source_lengths,
    target_sequence_lengths, 
    len(answers_vocab_to_int), len(questions_vocab_to_int),
    encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, beam_width, questions_vocab_to_int)

# Find the shape of the input data for sequence_loss
with tf.name_scope("optimization"):

    cost = tf.losses.huber_loss(
                   train_logits,
                   tf.one_hot(targets,len(answers_vocab_to_int),axis=-1),
                   delta=1.0,
                   scope=None,
                   loss_collection=tf.GraphKeys.LOSSES,
                   reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)

   # cost=tf.reduce_mean(tf.nn.l2_loss(train_logits - tf.one_hot(targets,vocab_size,axis=-1)))
    #cost = tf.reduce_mean(tf.square(tf.subtract(train_logits,targets)))
    #cost = losses * tf.ones([batch_size, max_sequence_length_batch])
    optimizer = tf.train.AdamOptimizer(learning_rate)

    # Gradient Clipping
    gradients = optimizer.compute_gradients(cost)
    capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
    train_op = optimizer.apply_gradients(capped_gradients)


(128, ?, 1024)
(128, ?, 1024)
128


In [419]:
def pad_sentence_batch(sentence_batch, vocab_to_int):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence_length = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [vocab_to_int[PAD]] * (max_sentence_length - len(sentence)) for sentence in sentence_batch]

In [420]:
def batch_data(questions, answers, batch_size):
    """Batch questions and answers together"""
    for batch_i in range(0, len(questions)//batch_size):
        start_i = batch_i * batch_size
        questions_batch = questions[start_i:start_i + batch_size]
        answers_batch = answers[start_i:start_i + batch_size]
        
        source_lengths = np.array( [len(sentence) for sentence in questions_batch] )
        
        pad_questions_batch = np.array(pad_sentence_batch(questions_batch, questions_vocab_to_int))
        pad_answers_batch = np.array(pad_sentence_batch(answers_batch, answers_vocab_to_int))
        yield source_lengths, pad_questions_batch, pad_answers_batch

In [421]:
# Validate the training with 10% of the data
train_valid_split = int(len(sorted_questions)*0.15)

# Split the questions and answers into training and validating data
train_questions = sorted_questions[train_valid_split:]
train_answers = sorted_answers[train_valid_split:]

valid_questions = sorted_questions[:train_valid_split]
valid_answers = sorted_answers[:train_valid_split]

print(len(train_questions))
print(len(valid_questions))

167152
29497


In [422]:
#TRAINING
display_step = 100 # Check training loss after every 100 batches
total_train_loss = 0 # Record the training loss for each display step

#VALIDATION
stop_early = 0 
stop = 5 # If the validation loss does decrease in 5 consecutive checks, stop training
validation_check = ((len(train_questions))//batch_size//2)-1 #Check validation loss every half-epoch
summary_valid_loss = [] # Record the validation loss for saving improvements in the model


checkpoint = "./checkpoints/best_model.ckpt" 

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch_i in range(1, epochs+1):
        for batch_i, (q_lengths, questions_batch, answers_batch) in enumerate(
                batch_data(train_questions, train_answers, batch_size)):
            #print(answers_batch)
            start_time = time.time()
            _, loss = sess.run(
                [train_op, cost],
                {input_data: questions_batch,
                 targets: answers_batch,
                 source_lengths: q_lengths, #NEW
                 lr: learning_rate,
                 max_sequence_length_batch: answers_batch.shape[1],
                 keep_prob: keep_probability})

            total_train_loss += loss
            end_time = time.time()
            batch_time = end_time - start_time
            
            total_train_loss += loss
            end_time = time.time()
            batch_time = end_time - start_time

            if batch_i % display_step == 0:
                print('Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>9.6f}, Seconds: {:>4.2f}'
                      .format(epoch_i,
                              epochs, 
                              batch_i, 
                              len(train_questions) // batch_size, 
                              total_train_loss / display_step, 
                              batch_time*display_step),
                         flush=True)
                total_train_loss = 0

            if batch_i % validation_check == 0 and batch_i > 0:
                total_valid_loss = 0
                start_time = time.time()
                for batch_ii, (q_lengths, questions_batch, answers_batch) in \
                        enumerate(batch_data(valid_questions, valid_answers, batch_size)):
                    valid_loss = sess.run(
                    cost, {input_data: questions_batch,
                           targets: answers_batch,
                           lr: learning_rate,
                           source_lengths: q_lengths, #NEW
                           max_sequence_length_batch: answers_batch.shape[1],
                           keep_prob: 1})
                    total_valid_loss += valid_loss
                end_time = time.time()
                batch_time = end_time - start_time
                avg_valid_loss = total_valid_loss / (len(valid_questions) / batch_size)
                print('Valid Loss: {:>9.6f}, Seconds: {:>5.2f}'.format(avg_valid_loss, batch_time), flush=True)

                # Reduce learning rate, but not below its minimum value
                learning_rate *= learning_rate_decay
                if learning_rate < min_learning_rate:
                    learning_rate = min_learning_rate

                summary_valid_loss.append(avg_valid_loss)
                if avg_valid_loss <= min(summary_valid_loss):
                    print('New Record!') 
                    stop_early = 0
                    saver = tf.train.Saver() 
                    saver.save(sess, checkpoint)

                else:
                    print("No Improvement.")
                    stop_early += 1
                    if stop_early == stop:
                        break
    
        if stop_early == stop:
            print("Stopping Training.")
            break


InvalidArgumentError: indices[15,30] = 8101 is not in [0, 7901)
	 [[Node: embedding_lookup_1 = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](W/read, concat_1, embedding_lookup_1/axis)]]

Caused by op 'embedding_lookup_1', defined at:
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-418-7cc5a8cb3578>", line 22, in <module>
    encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, beam_width, questions_vocab_to_int)
  File "<ipython-input-412-8cd018efcf6d>", line 24, in seq2seq_model
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/embedding_ops.py", line 308, in embedding_lookup
    transform_fn=None)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/embedding_ops.py", line 131, in _embedding_lookup_and_transform
    result = _clip(array_ops.gather(params[0], ids, name=name),
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 2736, in gather
    return gen_array_ops.gather_v2(params, indices, axis, name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3065, in gather_v2
    "GatherV2", params=params, indices=indices, axis=axis, name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): indices[15,30] = 8101 is not in [0, 7901)
	 [[Node: embedding_lookup_1 = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](W/read, concat_1, embedding_lookup_1/axis)]]


In [17]:
def question_to_seq(question, vocab_to_int, int_to_vocab):
    '''Prepare the question for the model'''
        
    cleaned_question = [Corpus.clean_sequence(int_to_vocab[word]) for word in question]
    return [vocab_to_int.get(word, vocab_to_int[UNK]) for word in cleaned_question]

In [19]:
# Create your own input question
#input_question = 'How are you?'

# Use a question from the data as your input
random = np.random.choice(len(sorted_questions))
input_question = sorted_questions[random]

# Prepare the question
input_question = question_to_seq(input_question, questions_vocab_to_int, questions_int_to_vocab)

# Pad the questions until it equals the max_line_length
input_question = input_question + [questions_vocab_to_int[PAD]] * (max_line_length - len(input_question))
# Add empty questions so the the input_data is the correct shape
batch_shell = np.zeros((batch_size, max_line_length))
# Set the first question to be out input question
batch_shell[0] = input_question 

# Remove the padding from the Question and Answer
pad_q = questions_vocab_to_int[PAD]
pad_a = questions_vocab_to_int[EOS]
#pad_q = questions_vocab_to_int["<PAD>"]
#pad_a = answers_vocab_to_int["<EOS>"]    
print('Question')
print('  Word Ids:      {}'.format([i for i in input_question if i != pad_q]))
print('  Input Words: {}'.format([questions_int_to_vocab[i] for i in input_question if i != pad_q]))

saver = tf.train.Saver()
with tf.Session() as sess:
    # Run the model with the input question
    saver.restore(sess, checkpoint)
    beam_output = sess.run(beams, {input_data: batch_shell,
                                   source_lengths: [len(input_question)] * batch_size,
                                                keep_prob: 1.0})[0]


for i in range(beam_width):
    beam = beam_output[:, i]
    print('\nAnswer', i)
    print('  Word Ids:      {}'.format([i for i in beam if i != pad_a]))
    print('  Response Words: {}'.format([answers_int_to_vocab[i] for i in beam if i != pad_a]))

Question
  Word Ids:      [666, 21, 3, 4558, 7018, 12, 8100]
  Input Words: ['major', 'this', 'is', 'deeply', 'offensive', 'and', '<UNK>']
INFO:tensorflow:Restoring parameters from ./checkpoints/best_model.ckpt

Answer 0
  Word Ids:      [1, 8, 5, 22]
  Response Words: ['i', 'do', 'not', 'know']

Answer 1
  Word Ids:      [1, 8, 5, 22, 8100]
  Response Words: ['i', 'do', 'not', 'know', '<UNK>']

Answer 2
  Word Ids:      [1, 8, 5, 22, 7]
  Response Words: ['i', 'do', 'not', 'know', 'it']

Answer 3
  Word Ids:      [1, 8, 5, 8, 5]
  Response Words: ['i', 'do', 'not', 'do', 'not']

Answer 4
  Word Ids:      [1, 8, 5, 8, 5, 22]
  Response Words: ['i', 'do', 'not', 'do', 'not', 'know']

Answer 5
  Word Ids:      [1, 8, 5, 8, 5, 22, 7]
  Response Words: ['i', 'do', 'not', 'do', 'not', 'know', 'it']

Answer 6
  Word Ids:      [1, 8, 5, 8, 5, 22, 1, 18, 5]
  Response Words: ['i', 'do', 'not', 'do', 'not', 'know', 'i', 'am', 'not']

Answer 7
  Word Ids:      [1, 8, 5, 8, 5, 22, 7, 3, 6, 8100]
