In [1]:
import numpy as np
import tensorflow as tf
import re ## to clean the text
import time ## to measure the training time
print (tf.VERSION)

1.0.0


# Part 1 : Data Pre-Processing

In [2]:
# Importing the dataset
lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
conversations = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')

In [3]:
print(lines[0:10])
print("\n")
print(conversations[0:10])

['L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!', 'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!', 'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.', 'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?', "L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.", 'L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow', "L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.", 'L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No', 'L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?', 'L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?']


["u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L194', 'L195', 'L196', 'L197']", "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']", "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']", "u0 +++$+++ u2 +++$+++

In [4]:
# Creating a dictionary that maps each line and its id
id2line = {}
for line in lines:
    _line = line.split(' +++$+++ ') ## _line= temporary variable, underscore specifies that it is temporary
    if len(_line) == 5:
        id2line[_line[0]] = _line[4] ## mapping of key value. Example: L1045: They do not 
id2line['L1045']

'They do not!'

In [5]:
# Creating a list of all of the conversations
conversations_ids = []
for conversation in conversations[:-1]: ## Python excludes the last row
    # -1 gives you ['L194', 'L195', 'L196', 'L197']
    #1:-1 removes "[ ]"
    # "replace" replaces quotes and whitespaces
    _conversation = conversation.split(' +++$+++ ')[-1][1:-1].replace("'", "").replace(" ", "")
    conversations_ids.append(_conversation.split(','))
conversations_ids[0:3]

[['L194', 'L195', 'L196', 'L197'],
 ['L198', 'L199'],
 ['L200', 'L201', 'L202', 'L203']]

In [6]:
# Getting separately the questions and the answers {L194: question and L195: answer; L198:question and L199:answer}
questions = []
answers = []
for conversation in conversations_ids:
    for i in range(len(conversation) - 1):
        questions.append(id2line[conversation[i]]) ## text of question
        answers.append(id2line[conversation[i+1]]) ## text of answer
print(questions[0:5])
print("\n")
print(answers[0:5])

['Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.', "Well, I thought we'd start with pronunciation, if that's okay with you.", 'Not the hacking and gagging and spitting part.  Please.', "You're asking me out.  That's so cute. What's your name again?", "No, no, it's my fault -- we didn't have a proper introduction ---"]


["Well, I thought we'd start with pronunciation, if that's okay with you.", 'Not the hacking and gagging and spitting part.  Please.', "Okay... then how 'bout we try out some French cuisine.  Saturday?  Night?", 'Forget it.', 'Cameron.']


In [7]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"[-()\"#/@;:<>{}+=~|.?,]", "", text)
    return text

In [8]:
# Cleaning the questions
clean_questions = []
for question in questions:
    clean_questions.append(clean_text(question))


# Cleaning the answers
clean_answers = []
for answer in answers:
    clean_answers.append(clean_text(answer))

In [9]:
# Creating a dictionary that maps each word to its number of occurrences
word2count = {}
for question in clean_questions:
    for word in question.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
            
for answer in clean_answers:
    for word in answer.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

print(word2count['marcia'])
print(word2count['wade'])
word2count.items()

25
133




In [10]:
# Creating two dictionaries that map the questions words and the answers words to a unique integer
threshold = 20 ## Remove Bottom 5%
questionswords2int = {}
word_number = 0

for word, count in word2count.items():
    if count >= threshold: ## Only include words occuring more than 20 times
        questionswords2int[word] = word_number
        word_number += 1

answerswords2int = {}
word_number = 0
        

for word, count in word2count.items():
    if count >= threshold:
        answerswords2int[word] = word_number
        word_number += 1

In [11]:
print(questionswords2int['marcia'])
print(questionswords2int.items())

1720


In [12]:
print(answerswords2int.items())



In [13]:
# Adding the last tokens to these two dictionaries
tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
for token in tokens:
    questionswords2int[token] = len(questionswords2int) + 1
    
for token in tokens:
    answerswords2int[token] = len(answerswords2int) + 1

In [14]:
# Creating the inverse dictionary of the answerswords2int dictionary
## w=words, w_i=integers obtained from answerswords2int.items()
answersints2word = {w_i: w for w, w_i in answerswords2int.items()} 
print(answersints2word[19])
print("\n")
print(answerswords2int['hoop'])
print("\n")
#print(answersints2word.items())

# Adding the End Of String token to the end of every answer. It is required for Seq2Seq Decoding
for i in range(len(clean_answers)):
    clean_answers[i] += ' <EOS>'
clean_answers[0:5]

night


1029




['well i thought we would start with pronunciation if that is okay with you <EOS>',
 'not the hacking and gagging and spitting part  please <EOS>',
 "okay then how 'bout we try out some french cuisine  saturday  night <EOS>",
 'forget it <EOS>',
 'cameron <EOS>']

In [15]:
# Translating all the questions and the answers into integers
# and Replacing all the words that were filtered out by <OUT> 
questions_into_int = []
for question in clean_questions:
    ints = []
    
    for word in question.split():
        if word not in questionswords2int:
            ints.append(questionswords2int['<OUT>']) ## <'OUT'> has an unique integer as well
        else:
            ints.append(questionswords2int[word])
            
    questions_into_int.append(ints)

print(questions_into_int[0:5])
print("\n")
    
answers_into_int = []


for answer in clean_answers:
    ints = []
    for word in answer.split():
        if word not in answerswords2int:
            ints.append(answerswords2int['<OUT>'])
            
        else:
            ints.append(answerswords2int[word])
    answers_into_int.append(ints)
print(answers_into_int[0:5])

[[7662, 4342, 8351, 5289, 4979, 8824, 8824, 7287, 6440, 8824, 5287, 8146, 6508, 3131, 8824, 3643, 4631, 5096, 4031, 8269, 8824, 7938], [8528, 3731, 7202, 4342, 2110, 592, 5309, 8824, 6227, 7645, 5155, 6337, 5309, 6251], [2897, 8269, 8824, 7287, 8824, 7287, 8824, 5508, 1347], [6251, 5287, 7805, 6880, 2109, 7645, 5155, 7743, 1985, 826, 5155, 1406, 5970, 7938], [3566, 3566, 4348, 2394, 8614, 4342, 3150, 4821, 6164, 3568, 8824]]


[[8528, 3731, 7202, 4342, 2110, 592, 5309, 8824, 6227, 7645, 5155, 6337, 5309, 6251, 8823], [2897, 8269, 8824, 7287, 8824, 7287, 8824, 5508, 1347, 8823], [6337, 8765, 8796, 5184, 4342, 2764, 2109, 1620, 5943, 8824, 2025, 19, 8823], [3528, 4357, 8823], [6460, 8823]]


In [16]:
# Sorting questions and answers by the length of questions
sorted_clean_questions = []
sorted_clean_answers = []

for length in range(1, 25 + 1): ## length of question is up to 25 words
    for i in enumerate(questions_into_int): ## index and the question i[0]=index, i[1]=question
        if len(i[1]) == length:
            sorted_clean_questions.append(questions_into_int[i[0]])
            sorted_clean_answers.append(answers_into_int[i[0]])

# PART 2 - BUILDING THE SEQ2SEQ MODEL 

In [17]:
# Creating placeholders for the inputs and the targets. They will be used to train the model later
## Tensors are advanced arrays
def model_inputs():
    inputs = tf.placeholder(tf.int32, [None, None], name = 'input') ## sorted Clean questions, list of integers. With Padding its a 2D matrix
    targets = tf.placeholder(tf.int32, [None, None], name = 'target')
    lr = tf.placeholder(tf.float32, name = 'learning_rate')
    keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') ## used to control drop outs, ie, deactivate neurons
    return inputs, targets, lr, keep_prob
    


# Preprocessing the targets (sorted clean answers)
## Targets must be sent in batches to the neural network, not one by one
## the batch should start with SOS token
## In order to keep the same size, we will remove the last column and add <SOS> token at the beginning
## last column is the token identifier of the answer
def preprocess_targets(targets, word2int, batch_size):## word2int dictionary provides unique identifier of the SOS token
    left_side = tf.fill([batch_size, 1], word2int['<SOS>'])## batch_size=number of rows
    ## drop the last token, extracts subset of tensorflow
    ## start the extraction from (0,0)
    ## Extract till batch_size,-1.Batch_size=rows, -1= last column -1
    ## [1,1]= slide one cell by one cell
    right_side = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1]) 
    preprocessed_targets = tf.concat([left_side, right_side], 1) # 1=horizontal concat
    return preprocessed_targets
    

 
# Creating the Encoder RNN
## rnn_size=no of input tensors in the layers
## rnn_inputs=model_inputs
## sequence_length=length of the each question in the batch
def encoder_rnn(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length):
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
    encoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers) ## stacked lstm layers with dropout applied
    encoder_output, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = encoder_cell,
                                                                 cell_bw = encoder_cell,
                                                                   sequence_length = sequence_length,
                                                                 inputs = rnn_inputs,
                                                                   dtype = tf.float32)
    return encoder_state

http://www.wildml.com/2016/01/attention-and-memory-in-deep-learning-and-nlp/

https://www.tensorflow.org/versions/r1.0/api_docs/python/tf/contrib/seq2seq

In [18]:
# Decoding the training set
## decoder_embedded_input=embedding of words into vectors of real numbers. Decoder expects the input in this format
## decoding_scope=advanced tensorflow data structure which will wrap the tensorflow variables



def decode_training_set(encoder_state, decoder_cell, decoder_embedded_input, 
                        sequence_length, decoding_scope, output_function, keep_prob, batch_size):
    ## batch_size=number of lines
    ## 1=number of columns
    ## number of elements=decoder_cell.output_size
    attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
    # attention_keys= keys to be compared to target state
    # attention_values= values to construct the context. Context is returned by the encoder
    ## attention_score=used to compute similarity between keys and target set
    ## attention_construct_function=used to build the attention set
    
    
    
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option = "bahdanau", num_units = decoder_cell.output_size)
    training_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_train(encoder_state[0],
                                                                             attention_keys,
                                                                             attention_values,
                                                                             attention_score_function,
                                                                             attention_construct_function,
                                                                             name = "attn_dec_train") ## name scope of the decoder
    decoder_output, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                            training_decoder_function,
                                                                                                             decoder_embedded_input,
                                                                                                             sequence_length,
                                                                                                             scope = decoding_scope)
    decoder_output_dropout = tf.nn.dropout(decoder_output, keep_prob)
    return output_function(decoder_output_dropout)

In [19]:
# Decoding the test/validation set
## This function will logically answer the question used during testing based on the training it underwent
## This function will also be used during cross validation
## max_length= longest answer you can find in the batch
## num_words=total number of words for all the answers
def decode_test_set(encoder_state, decoder_cell, decoder_embeddings_matrix, sos_id, eos_id, 
                    maximum_length, num_words, decoding_scope, output_function, keep_prob, batch_size):
    attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option = "bahdanau", num_units = decoder_cell.output_size)
    test_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_inference(output_function,
                                                                             encoder_state[0],
                                                                              attention_keys,
                                                                              attention_values,
                                                                              attention_score_function,
                                                                             attention_construct_function,
                                                                              decoder_embeddings_matrix,
                                                                              sos_id,
                                                                              eos_id,
                                                                            maximum_length,
                                                                             num_words,
                                                                            name = "attn_dec_inf") ## decoder is in inference mode
    test_predictions, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                               test_decoder_function,
                                                                                                               scope = decoding_scope)
    return test_predictions

In [20]:
# Creating the Decoder RNN
def decoder_rnn(decoder_embedded_input, decoder_embeddings_matrix, encoder_state, num_words, 
                sequence_length, rnn_size, num_layers, word2int, keep_prob, batch_size):
    with tf.variable_scope("decoding") as decoding_scope:
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
        decoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers) ## stacked lstm layers with dropout applied
        weights = tf.truncated_normal_initializer(stddev = 0.1) ## initialize weights, truncated normal distribution of weights
        biases = tf.zeros_initializer() ## initialize biases
        
        ## x is the parameter passed to the function and it returnd fully connected layer
        output_function = lambda x: tf.contrib.layers.fully_connected(x,
                                                                      num_words,
                                                                      None,
                                                                      scope = decoding_scope,
                                                                     weights_initializer = weights,
                                                                      biases_initializer = biases)
        training_predictions = decode_training_set(encoder_state,
                                                  decoder_cell,
                                                   decoder_embedded_input,
                                                  sequence_length,
                                                   decoding_scope,
                                                  output_function,
                                                  keep_prob,
                                                   batch_size)
        decoding_scope.reuse_variables()
        test_predictions = decode_test_set(encoder_state, ## used for cross validation
                                          decoder_cell,
                                           decoder_embeddings_matrix,
                                          word2int['<SOS>'],
                                           word2int['<EOS>'],
                                          sequence_length - 1,
                                           num_words,
                                          decoding_scope,
                                           output_function,
                                           keep_prob,
                                           batch_size)
           
    return training_predictions, test_predictions

In [21]:
# Building the seq2seq model

# inputs = questions in the dataset
# targets = answers in the dataset
# encoder_embedding_size=dimension of the encoder
def seq2seq_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, 
                  questions_num_words, encoder_embedding_size, decoder_embedding_size, rnn_size, num_layers, questionswords2int):
    
    encoder_embedded_input = tf.contrib.layers.embed_sequence(inputs,
                                                              answers_num_words + 1,
                                                              encoder_embedding_size,
                                                             initializer = tf.random_uniform_initializer(0, 1))# 0 to 1 is the random scale
    
    encoder_state = encoder_rnn(encoder_embedded_input, rnn_size, num_layers, keep_prob, sequence_length)
    preprocessed_targets = preprocess_targets(targets, questionswords2int, batch_size)
    
    ## initiliase with random uniform numbers
    ## no of lines=questions_num_words
    decoder_embeddings_matrix = tf.Variable(tf.random_uniform([questions_num_words + 1, decoder_embedding_size], 0, 1))
    decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix, preprocessed_targets)
    
    training_predictions, test_predictions = decoder_rnn(decoder_embedded_input,
                                                      decoder_embeddings_matrix,
                                                         encoder_state,
                                                         questions_num_words,
                                                         sequence_length,
                                                         rnn_size,
                                                         num_layers,
                                                         questionswords2int,
                                                         keep_prob,
                                                         batch_size)
    return training_predictions, test_predictions    

# Training the Seq2Seq Model

In [22]:
#Setting the Hyperparameters
epochs = 100
batch_size = 64
rnn_size = 512
num_layers = 3 ## decoder and encoder layers
encoding_embedding_size = 512 ## no of column in encoding matrix
decoding_embedding_size = 512
learning_rate = 0.01 
learning_rate_decay = 0.9 ## to reduce the speed of learning rate
min_learning_rate = 0.0001
keep_probability = 0.5 ## drop_out for hidden unit. This is the most optimal value

In [32]:
# Defining a session
tf.reset_default_graph() ## reset the graph before starting TF session
session = tf.InteractiveSession()

# Loading the model inputs
inputs, targets, lr, keep_prob = model_inputs()
print(inputs) # questions
print(targets) # targets
print(lr) # learning rate
print(keep_prob) # keep probability

# Setting the sequence length
sequence_length = tf.placeholder_with_default(25, None, name = 'sequence_length') # max sequence_length=25  
print(sequence_length)

# Getting the shape of the inputs tensor
input_shape = tf.shape(inputs)
print(input_shape)

Tensor("input:0", shape=(?, ?), dtype=int32)
Tensor("target:0", shape=(?, ?), dtype=int32)
Tensor("learning_rate:0", dtype=float32)
Tensor("keep_prob:0", dtype=float32)
Tensor("sequence_length:0", dtype=int32)
Tensor("Shape:0", shape=(2,), dtype=int32)


In [33]:
# Getting the training and test predictions
training_predictions, test_predictions = seq2seq_model(tf.reverse(inputs, [-1]), ## reverse the dimensions
                                                       targets,
                                                       keep_prob,
                                                       batch_size,
                                                       sequence_length,
                                                       len(answerswords2int),
                                                       len(questionswords2int),
                                                       encoding_embedding_size,
                                                       decoding_embedding_size,
                                                       rnn_size,
                                                       num_layers,
                                                       questionswords2int)
print(training_predictions)
print(test_predictions)

Tensor("decoding/Reshape_1:0", shape=(64, ?, 8825), dtype=float32)
Tensor("decoding/dynamic_rnn_decoder_1/transpose:0", shape=(?, ?, 8825), dtype=float32)


In [23]:
## gardient_clipping=cap the gradient to a min and max value to avoid exploding or vanishing gradient problem
with tf.name_scope("optimization"):
    loss_error = tf.contrib.seq2seq.sequence_loss(training_predictions,
                                                  targets,
                                                  tf.ones([input_shape[0], sequence_length])) ## initiliaze weights to 1,[number of lines, columns]
    

    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss_error)
    clipped_gradients = [(tf.clip_by_value(grad_tensor, -5., 5.), grad_variable) 
                         for grad_tensor, grad_variable in gradients if grad_tensor is not None]
    optimizer_gradient_clipping = optimizer.apply_gradients(clipped_gradients)

In [24]:
# Padding the sequences with the <PAD> token so that length of question and answer sequence is same
#Question : ['Who','are','you?',<PAD>,<PAD>,<PAD>]
#Answer: [<SOS>,'I','am','a','bot',<'EOS'>]
def apply_padding(batch_of_sequences, word2int):
    max_sequence_length = max([len(sequence) for sequence in batch_of_sequences])
    ## return list of sequences
    return [sequence + [word2int['<PAD>']] * (max_sequence_length - len(sequence)) for sequence in batch_of_sequences]

In [25]:
# Splitting the data into batches of questions and answers
def split_into_batches(questions, answers, batch_size): ## batch_size=64
    for batch_index in range(0, len(questions) // batch_size): ## "//" to get an integer
        start_index = batch_index * batch_size ## 
        questions_in_batch = questions[start_index : start_index + batch_size]
        answers_in_batch = answers[start_index : start_index + batch_size]
        padded_questions_in_batch = np.array(apply_padding(questions_in_batch, questionswords2int))
        padded_answers_in_batch = np.array(apply_padding(answers_in_batch, answerswords2int))
        yield padded_questions_in_batch, padded_answers_in_batch

In [None]:
# Splitting the questions and answers into training and validation sets (Cross Validation)
# validation set is to validate during the training
training_validation_split = int(len(sorted_clean_questions) * 0.15)
training_questions = sorted_clean_questions[training_validation_split:]
training_answers = sorted_clean_answers[training_validation_split:]
validation_questions = sorted_clean_questions[:training_validation_split]
validation_answers = sorted_clean_answers[:training_validation_split]

In [None]:
# Training
batch_index_check_training_loss = 100 ## Check training_loss every 100 batches
batch_index_check_validation_loss = ((len(training_questions)) // batch_size // 2) - 1 ## Check validation loss half number of batches
total_training_loss_error = 0 ## initialized
list_validation_loss_error = [] ## initialize as an empty list
early_stopping_check = 0
early_stopping_stop = 1000 ## if the loss goes below this, we will stop the training. This is high. Usually it is 100
checkpoint = "chatbot_weights.ckpt"
session.run(tf.global_variables_initializer())

for epoch in range(1, epochs + 1):# for each epoch
    # for each batch in the epoch
    # fetch batch_index, padded questions and answers
    for batch_index, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(training_questions, training_answers, batch_size)):
        starting_time = time.time()
        _, batch_training_loss_error = session.run([optimizer_gradient_clipping, loss_error], {inputs: padded_questions_in_batch,
                                                                                               targets: padded_answers_in_batch,
                                                                                               lr: learning_rate,
                                                                                               sequence_length: padded_answers_in_batch.shape[1],
                                                                                               keep_prob: keep_probability})
        total_training_loss_error += batch_training_loss_error
        ending_time = time.time()
        batch_time = ending_time - starting_time
        
        if batch_index % batch_index_check_training_loss == 0: ## Check if batch_index=100 
             print('Epoch: {:>3}/{}, Batch: {:>4}/{}, Training Loss Error: {:>6.3f}, Training Time on 100 Batches: {:d} seconds'.format(epoch, epochs,batch_index,len(training_questions) , total_training_loss_error / batch_index_check_training_loss,int(batch_time * batch_index_check_training_loss)))
                  
             total_training_loss_error = 0 ## re-initialize after 100 batches

                                                                                                                                                                                                                                                                         
        ## Perform cross validation at halfway
        if batch_index % batch_index_check_validation_loss == 0 and batch_index > 0:
             total_validation_loss_error = 0
             starting_time = time.time()
             for batch_index_validation, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(validation_questions, validation_answers, batch_size)):
                    batch_validation_loss_error = session.run(loss_error, 
                                                              {inputs: padded_questions_in_batch,targets: padded_answers_in_batch,lr: learning_rate,sequence_length: padded_answers_in_batch.shape[1],keep_prob: 1})
                    total_validation_loss_error += batch_validation_loss_error
              ## End of For Loop                   
             ending_time = time.time()
             batch_time = ending_time - starting_time
             average_validation_loss_error = total_validation_loss_error / (len(validation_questions) / batch_size)
             print('Validation Loss Error: {:>6.3f}, Batch Validation Time: {:d} seconds'.format(average_validation_loss_error, int(batch_time)))
             learning_rate *= learning_rate_decay ## learning_rate=learning_rate*learning_rate_decay
         
             if learning_rate < min_learning_rate:
                learning_rate = min_learning_rate ##  if the learning_rate decayed, then set to minm learning rate
        
             list_validation_loss_error.append(average_validation_loss_error)
         
             if average_validation_loss_error <= min(list_validation_loss_error): ## There is an improvement 
                print('I speak better now!!')
                early_stopping_check = 0
                saver = tf.train.Saver() ## Save the model
                saver.save(session, checkpoint) ## Save the checkpoint with weights
             else:
                print("Sorry I do not speak better, I need to practice more.")
                early_stopping_check += 1
            
             if early_stopping_check == early_stopping_stop:
                break
                
    if early_stopping_check == early_stopping_stop:
        print("My apologies, I cannot speak better anymore. This is the best I can do.")
        break
print("Game Over")

Epoch:   1/100, Batch:    0/173357, Training Loss Error:  0.093, Training Time on 100 Batches: 1829 seconds
Epoch:   1/100, Batch:  100/173357, Training Loss Error:  2.395, Training Time on 100 Batches: 1912 seconds


# Testing the Model

In [None]:
# Loading the weights and Running the session
checkpoint = "./chatbot_weights.ckpt"
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(session, checkpoint)

In [None]:
# Converting the questions (asked in the chat) from strings to lists of encoding integers
def convert_string2int(question, word2int):
    question = clean_text(question)
    ## covert each word in the question into string. Filter out non frequent words
    return [word2int.get(word, word2int['<OUT>']) for word in question.split()]

In [None]:
# Setting up the chat
while(True):
    question = input("You: ")
    if question == 'Goodbye': ## if we say GoodBye, chat is ended
        break
    question = convert_string2int(question, questionswords2int)
    ## Making sure that the length of the question asked to the chatbot and the ones used to train the chatbot 
    ## is of same length. Add remaining <PAD> tokens
    question = question + [questionswords2int['<PAD>']] * (25 - len(question))
    ## the model only accepts batch, so creating a fake batch.
    ## The fake batch will be created using zero's and merged with the actual question
    fake_batch = np.zeros((batch_size, 25))
    # insert question in the first row of the fake batch
    fake_batch[0] = question
    
    ## Use the first predicted answer
    predicted_answer = session.run(test_predictions, {inputs: fake_batch, keep_prob: 0.5})[0]
    answer = ''
    
    ## Cleaning the response
    for i in np.argmax(predicted_answer, 1):
        if answersints2word[i] == 'i':
            token = ' I'
        elif answersints2word[i] == '<EOS>':
            token = '.'
        elif answersints2word[i] == '<OUT>':
            token = 'out'
        else:
            token = ' ' + answersints2word[i]
        answer += token
        
        if token == '.': ## end of chatbot answer
            break
            
    print('ChatBot: ' + answer)