# Building a Chatbot with Deep Learning

![image](https://user-images.githubusercontent.com/35156624/126909072-47c9be9e-549c-420f-ac4b-f9bbd2a4de22.png)


In [114]:
import numpy as np
import tensorflow as tf
import re 
import time 

## We need to import the dataset for data preprocessing

In [115]:
movie_lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
conversations = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')

In [116]:
print()
print("Raw movie lines:")
print()
movie_lines[1:5]


Raw movie lines:



['L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!',
 'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.',
 'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?',
 "L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go."]

In [117]:
print()
print("Raw Conversations:")
print()
conversations[1:5]


Raw Conversations:



["u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L204', 'L205', 'L206']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L207', 'L208']"]

## Create a dictionary to map movie line and id

In [118]:
id_2_movieline = {}
for line in movie_lines:
    _line = line.split(" +++$+++ ")
    if len(_line) == 5:
        id_2_movieline[_line[0]] = _line[4]

In [119]:
print()
print("Movie Lines of data set:")
print()
print(dict(list(id_2_movieline.items())[1:10]))


Movie Lines of data set:

{'L236278': 'They went crazy.', 'L328290': "I'm not with the Agency, Mr. Garrison, and I assume if you've come this far, what I have to say interests you.  But I'm not going to name names, or tell you who or what I represent. Except to say - you're close, you're closer than you think...", 'L278820': "Oh, fine.  She's gone away for a little while and when she comes back I've sort of resolved to really tell her how much I care for her.", 'L366322': "I think it's fantastic.", 'L506979': 'Can I tell you kids something?', 'L625440': "The MCP is the most efficient way of handling what we do. I can't sit and worry about every little user request that --", 'L395818': "You're Mabel - her sister - aren't you?", 'L236405': 'Did you bring that tape?', 'L633419': "Didn't have a choice.  My car overheated up the road."}


## Create a list of all the conversations

In [120]:
conversations_ids = []
for conversation in conversations[:-1]:
    _conversation = conversation.split(" +++$+++ ")[-1][1:-1].replace("'", "").replace(" ", "")
    conversations_ids.append(_conversation.split(","))
print()
print("List of conversations:")
print()
conversations_ids[:10]


List of conversations:



[['L194', 'L195', 'L196', 'L197'],
 ['L198', 'L199'],
 ['L200', 'L201', 'L202', 'L203'],
 ['L204', 'L205', 'L206'],
 ['L207', 'L208'],
 ['L271', 'L272', 'L273', 'L274', 'L275'],
 ['L276', 'L277'],
 ['L280', 'L281'],
 ['L363', 'L364'],
 ['L365', 'L366']]

In [121]:
print("Split the questions and answers")
print()
questions = []
answers = []
for convs in conversations_ids:
    for i in range(len(convs) - 1):
        questions.append(id_2_movieline[convs[i]])
        answers.append(id_2_movieline[convs[i + 1  ]])
print("Questions:")
print()
print(questions[:10])
print()
print("Answers:")
print()
print(answers[:10])

Split the questions and answers

Questions:

['Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.', "Well, I thought we'd start with pronunciation, if that's okay with you.", 'Not the hacking and gagging and spitting part.  Please.', "You're asking me out.  That's so cute. What's your name again?", "No, no, it's my fault -- we didn't have a proper introduction ---", 'Cameron.', "The thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does.", 'Why?', 'Unsolved mystery.  She used to be really popular when she started high school, then it was just like she got sick of it or something.', 'Gosh, if only we could find Kat a boyfriend...']

Answers:

["Well, I thought we'd start with pronunciation, if that's okay with you.", 'Not the hacking and gagging and spitting part.  Please.', "Okay... then how 'bout we try out some French cuisine.  Saturday?

## Now we need to clean the text

In [122]:
def clean(text):
    """
    function: clean
    params: String text
    does: cleans the text removing stop words, punctuation, lower case.
    returns: String clean text 
    """
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"[-()\"#/@;:<>{}+=~|.?,]", "", text)
    return text

In [123]:
clean_ques = []
clean_answ = []
for question in questions:
    clean_ques.append(clean(question))
for answer in answers:
    clean_answ.append(clean(answer))
print()
print("Cleaned Questions:")
print(clean_ques[:10])
print()
print("Cleaned Answers:")
print(clean_answ[:10])


Cleaned Questions:
['can we make this quick  roxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quad  again', 'well i thought we would start with pronunciation if that is okay with you', 'not the hacking and gagging and spitting part  please', 'you are asking me out  that is so cute what is your name again', "no no it's my fault  we didn't have a proper introduction ", 'cameron', 'the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i cannot date until she does', 'why', 'unsolved mystery  she used to be really popular when she started high school then it was just like she got sick of it or something', 'gosh if only we could find kat a boyfriend']

Cleaned Answers:
['well i thought we would start with pronunciation if that is okay with you', 'not the hacking and gagging and spitting part  please', "okay then how 'bout we try out some french cuisine  saturday  night", 'forget it', 'cameron', 'the thing is

## Remove less frequent words

Find the number of occurunces of each word and remove the lowers 5%, this is to speed up the process of training the data in the neural network and to focus on the most impactful words in the corpus.

In [124]:
count_words = {}
for ques in clean_ques:
    for word in ques.split():
        if word in count_words:
            count_words[word] += 1
        else:
            count_words[word] = 1

for answ in clean_answ:
    for word in answ.split():
        if word in count_words:
            count_words[word] += 1
        else:
            count_words[word] = 1
print()
print("Word count hash table:")
print()
print(dict(list(count_words.items())[1:10]))


Word count hash table:

{'marston': 1, 'usewe': 1, 'paw': 9, 'amscray': 2, 'started': 853, "minutes'": 3, 'hereor': 4, 'alleys': 11, 'motherfucker!!': 1}


## Tokenize and create a threshold 

Tokenize to get all words and filter out words that do not meet the threshold. The threshold is set at 20%, this hyperparamater can be attuned at different levels to improve the model. Map the words to a unique number.

In [125]:
threshold = 20
questions_mapping = {}
w_count = 0
for word, count in count_words.items():
    if count >= threshold:
        questions_mapping[word] = w_count
        w_count += 1

threshold_answ = 20
answers_mapping = {}
w_count = 0
for word, count in count_words.items():
    if count >= threshold_answ:
        answers_mapping[word] = w_count
        w_count += 1

print()
print("Questions Mapping:")
print()
print(dict(list(questions_mapping.items())[1:10]))
print()
print("Answers Mapping")
print()
print(dict(list(answers_mapping.items())[1:10]))


Questions Mapping:

{'intellectual': 6723, "man's": 5513, 'insight': 1143, 'prize': 6489, 'started': 0, 'actions': 1144, 'sang': 4345, 'reconsider': 4344, 'studied': 7889}

Answers Mapping

{'intellectual': 6723, "man's": 5513, 'insight': 1143, 'prize': 6489, 'started': 0, 'actions': 1144, 'sang': 4345, 'reconsider': 4344, 'studied': 7889}


In [126]:
## TODO: LEFT OFF HERE, WORKS ABOVE. 
tokens = ['<PAD>', '<EOS>', '<OUT>','<SOS>']

for token in tokens:
    questions_mapping[token] = len(questions_mapping) + 1

for token in tokens:
    answers_mapping[token] = len(answers_mapping) + 1

In [127]:
inverse_answers = {w_i: w for w, w_i in answers_mapping.items()}

Now we need to add the EOS token to end of every answer

In [128]:
for i in range(len(clean_answ)):
    clean_answ[i] += ' <EOS>'

In [129]:
print()
print("EOS token at the end of each answer, this is used for the decoding part of the seq2seq model:")
print()
clean_answ[:10]


EOS token at the end of each answer, this is used for the decoding part of the seq2seq model:



['well i thought we would start with pronunciation if that is okay with you <EOS>',
 'not the hacking and gagging and spitting part  please <EOS>',
 "okay then how 'bout we try out some french cuisine  saturday  night <EOS>",
 'forget it <EOS>',
 'cameron <EOS>',
 'the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i cannot date until she does <EOS>',
 'seems like she could get a date easy enough <EOS>',
 'unsolved mystery  she used to be really popular when she started high school then it was just like she got sick of it or something <EOS>',
 'that is a shame <EOS>',
 'let me see what i can do <EOS>']

## Map the questions and answers for the RNN

We need to map the questions and answers to integers in order to train the RNN. This is required as categorical data cannot be trained this way. 

In [130]:
quest_mapping_ints = []

for ques in clean_ques:
    words_map = []
    for word in ques.split():
        if word not in questions_mapping:
            words_map.append(questions_mapping['<OUT>'])
        else:
            words_map.append(questions_mapping[word])
    quest_mapping_ints.append(words_map)
    
answ_mapping_ints = []

for answ in clean_answ:
    words_map = []
    for word in answ.split():
        if word not in answers_mapping:
            words_map.append(answers_mapping['<OUT>'])
        else:
            words_map.append(answers_mapping[word])
    answ_mapping_ints.append(words_map)


In [131]:
print()
print("Questions Map to integers:")
print()
print(quest_mapping_ints[:10])
print()
print("Answers Map to integers:")
print()
print(answ_mapping_ints[:10])


Questions Map to integers:

[[7605, 3245, 6962, 984, 4142, 8824, 8824, 7071, 4137, 8824, 1948, 2507, 6739, 5454, 8824, 1882, 7415, 4290, 6632, 8247, 8824, 2082], [8556, 2487, 1274, 3245, 8429, 7266, 841, 8824, 8480, 5956, 5359, 3113, 841, 761], [4253, 8247, 8824, 7071, 8824, 7071, 8824, 8196, 8537], [761, 1948, 4787, 159, 5479, 5956, 5359, 5167, 1128, 615, 5359, 2647, 1081, 2082], [6895, 6895, 5244, 4015, 2517, 3245, 4184, 1737, 1828, 6338, 8824], [2774], [8247, 2796, 5359, 2774, 2487, 5435, 6903, 8247, 928, 2663, 1828, 6197, 8824, 5079, 2663, 7060, 4015, 8332, 2487, 5971, 697, 7668, 1838, 1693], [8345], [8824, 3775, 1838, 1142, 205, 6459, 1591, 702, 6423, 1838, 0, 5181, 959, 6932, 1044, 3398, 7794, 7792, 1838, 4745, 4871, 2663, 1044, 7991, 3623], [8021, 8480, 1954, 3245, 7778, 5670, 7767, 1828, 2296]]

Answers Map to integers:

[[8556, 2487, 1274, 3245, 8429, 7266, 841, 8824, 8480, 5956, 5359, 3113, 841, 761, 8823], [4253, 8247, 8824, 7071, 8824, 7071, 8824, 8196, 8537, 8823], [3113,

Now we need to sort the questions and answers by the length of the questions. This will speed up the training in optimization stage. We can set the length of answer and question as 25, as a threshold.

In [132]:
sorted_questions = []
sorted_answers = []
for i in range(1, 26):
    for indx, val in enumerate(quest_mapping_ints):
        if len(val) == i:
            sorted_questions.append(val)
            sorted_answers.append(answ_mapping_ints[indx])

print()
print("Sorted Questions: ")
print()
print(sorted_questions[:10])
print()
print("Sorted Answers: ")
print()
print(sorted_answers[:10])


Sorted Questions: 

[[2774], [8345], [2521], [2450], [367], [6895], [3344], [6895], [1666], [3072]]

Sorted Answers: 

[[8247, 2796, 5359, 2774, 2487, 5435, 6903, 8247, 928, 2663, 1828, 6197, 8824, 5079, 2663, 7060, 4015, 8332, 2487, 5971, 697, 7668, 1838, 1693, 8823], [8824, 3775, 1838, 1142, 205, 6459, 1591, 702, 6423, 1838, 0, 5181, 959, 6932, 1044, 3398, 7794, 7792, 1838, 4745, 4871, 2663, 1044, 7991, 3623, 8823], [8162, 8823], [6965, 7792, 612, 6537, 5479, 1184, 4605, 8823], [761, 6363, 7858, 984, 8019, 8823], [3113, 761, 1948, 4387, 1313, 205, 7255, 5977, 205, 3442, 8823], [191, 2441, 8823], [761, 5577, 2813, 7021, 1988, 1044, 8823], [5638, 8823], [8429, 761, 2352, 3691, 159, 1828, 76, 2774, 8823]]


## Building the Seq2Seq Model

In [133]:
def model_inputs():
    """
    function: model_inputs
    params: none
    returns: int inputs, int target, float learning rate, float drop_out
    """
    inputs = tf.placeholder(tf.int32, [None, None], name = 'input')
    targets = tf.placeholder(tf.int32, [None, None], name = 'target')
    learning_rate = tf.placeholder(tf.float32, name = 'learning_rate')
    drop_out = tf.placeholder(tf.float32, name = 'drop_out')
    return inputs, targets, learning_rate, drop_out

In [134]:
def lstm_targets(targets, hash_words, batch_size):
    """
    function: lstm_targets
    params: targets tenor, hash_words hash table, batch_size int
    returns: tensors targets
    """
    left = tf.fill([batch_size, 1], hash_words['<SOS>'])
    right = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1])
    targets = tf.concat([left, right], 1)
    return targets

In [135]:
def encoder_layer(rnn_inputs, rnn_size, rnn_num_layers, drop_out, seq_len):
    """
    function: encoder_layer
    params: rnn_inputs, int size number input size, rnn_num_layers int, dropout rate int, seq_len
    int length of list in batch
    returns: encoder layer
    """
    # make the lstm
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    # apply dropout on lstm
    dropout_lstm = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = drop_out)
    # create the encoder cell
    encoder_cell = tf.contrib.rnn.MultiRNNCell([dropout_lstm] * rnn_num_layers)
    # dynamic RNN
    _, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = encoder_cell, 
                                                       cell_bw = encoder_cell,
                                                      sequence_length = seq_len,
                                                      inputs = rnn_inputs,
                                                      dtype = tf.float32)
    return encoder_state

In [136]:
def decoder_training(encoder_state, decoder, decoder_input, seq_len, decode_scope, output_fun, drop_out, batch_size):
    """
    function: decoder_training
    params: encoder_state returned from encoder_layer, decoder cell in RNN, decoder_input embedding,
    seq len int, decode scope, out output function, drop_out drop out rate, batch size int)
    returns: decoder output with drop out
    """
    states = tf.zeros([batch_size, 1, decoder.output_size])
    keys, vals, scores, constructs = tf.contrib.seq2seq.prepare_attention(states, 
                                                                          attention_option = 'bahdanau',
                                                                          num_units = decoder.output_size)

    decoder_training = tf.contrib.seq2seq.attention_decoder_fn_train(encoder_state[0], 
                                                                     keys, 
                                                                     vals, 
                                                                     scores, 
                                                                     constructs,
                                                                     name = "attn_dec_train")

    output, _final_state, _final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder,
                                                                                        decoder_training,
                                                                                        decoder_input, 
                                                                                        seq_len,
                                                                                        scope = decode_scope)
    decoder_drop_out = tf.nn.dropout(output, drop_out)
    return  output_fun(decoder_drop_out)

In [137]:
def decode_validation_set(encoder_state, decoder, decoder_matrix, sos_id, eos_id, max_len_batch, total_words_ans,
                              decode_scope, output_fun, drop_out, batch_size):
        """
        function: decode_validation_set
        params: encoder_state returned from encoder_layer, decoder cell in RNN, decoder_input embedding,
        seq len int, decode scope, out output function, drop_out drop out rate, batch size int
        returns: test_predictions
        """
        states = tf.zeros([batch_size, 1, decoder.output_size])
        keys, vals, scores, constructs = tf.contrib.seq2seq.prepare_attention(states,
                                                                              attention_option = 'bahdanau',
                                                                              num_units = decoder.output_size)
   
        decoder_test = tf.contrib.seq2seq.attention_decoder_fn_inference(output_fun,
                                                                         encoder_state[0],
                                                                         keys,
                                                                         vals,
                                                                         scores,
                                                                         constructs,
                                                                         decoder_matrix,
                                                                         sos_id,
                                                                         eos_id,
                                                                         max_len_batch,
                                                                         total_words_ans,
                                                                         name = "attn_dec_inf")
   
        test_predictions, _final_state, _final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder,
                                                                                                      decoder_test, 
                                                                                                     scope = decode_scope)
        return test_predictions

In [138]:
def smartbot_rnn(decoder_input, decoder_matrix, encoder_state, total_words, seq_len, rnn_size, num_layers_rnn,
                    hash_words, drop_out, batch_size):
        """
        function: smartbot_rnn
        params: decoder_input, decoder_matrix, encoder_state, total_words_corpus int, seq_len int, rnn_size int,
        num_layers_rnn int, hash_words hashtable, drop_out float, batch_size int
        returns:
        """
        with tf.variable_scope("decoding") as decoding_scope:
            lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
            lstm_layer_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = drop_out)
            smartbot_decoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_layer_dropout] * num_layers_rnn)
            weights = tf.truncated_normal_initializer(stddev = 0.1)
            bias = tf.zeros_initializer()
            output_fun = lambda x: tf.contrib.layers.fully_connected(x,
                                                                    total_words,
                                                                    None,
                                                                    scope = decoding_scope,
                                                                    weights_initializer = weights,
                                                                    biases_initializer = bias)
            train_preds = decoder_training(encoder_state,
                                         smartbot_decoder_cell,
                                         decoder_input,
                                         seq_len,
                                         decoding_scope,
                                         output_fun,
                                         drop_out,
                                         batch_size)
   
            decoding_scope.reuse_variables()
            test_preds = decode_validation_set(encoder_state,
                                                    smartbot_decoder_cell,
                                                    decoder_matrix,
                                                    hash_words['<SOS>'],
                                                    hash_words['<EOS>'],
                                                    seq_len - 1,
                                                    total_words,
                                                    decoding_scope,
                                                    output_fun,
                                                    drop_out,
                                                    batch_size)
   
            return train_preds, test_preds

In [139]:
def smartbot_model(inputs, targets, dropout, batch_size, seq_len, num_words_answers, num_words_questions, encoder_size,
                      decoder_size, rnn_size, rnn_num_layers, questions_hash):
        """
        function: smartbot_model
        params: inputs questions vector, targets answers vector, dropout rate float, batch_size int, seq_len int,
                num_words_answers int, num_words_questions int, encoder_size int, decoder_size int, rnn_size int,
                rnn_num_layers int, questions_hash hashtable
        returns: seq2seq RNN model
        """
        encoder_input = tf.contrib.layers.embed_sequence(inputs,
                                                        num_words_answers + 1,
                                                        encoder_size,
                                                        initializer = tf.random_uniform_initializer(0, 1))
   
        encoder_state = encoder_layer(encoder_input,
                                     rnn_size,
                                     rnn_num_layers,
                                     dropout,
                                     seq_len)
   
        targets = lstm_targets(targets, questions_mapping, batch_size)
        decoder_matrix = tf.Variable(tf.random_uniform([num_words_questions + 1, decoder_size], 0, 1))
        decoder_input = tf.nn.embedding_lookup(decoder_matrix, targets)
        
        train_preds, test_preds = smartbot_rnn(decoder_input,
                                               decoder_matrix,
                                               encoder_state,
                                               num_words_questions, 
                                               seq_len,
                                               rnn_size, 
                                               rnn_num_layers,
                                               questions_mapping,
                                               dropout,
                                               batch_size)
        return train_preds, test_preds

## Train the model - Set up the hyperparamaters

In [140]:
epochs = 100
batch_size = 64
rnn_size = 512
number_layers = 3
encoding_embed_size = 512
decoding_embed_size = 512
learning_rate = 0.01
# percent learning rate is reduced, learn in more depth as it progresses. Commmon value is 90%
learning_rate_decay = 0.9
min_learning_rate = 0.0001
keep_probability = 0.50

## Create the tensorflow object

In [141]:
tf.reset_default_graph()
session = tf.InteractiveSession()

In [142]:
inputs, targets, learning, keep_prob = model_inputs()

In [143]:
# 25 max length no more than 25 words in questions and answers, no tensor to deal with, name of seq length
seq_len = tf.placeholder_with_default(25, None, name = "sequence_length")

In [144]:
input_shape = tf.shape(inputs)

## Start outputting the training and test predictions 

In [145]:
train_preds, test_preds = smartbot_model(tf.reverse(inputs, [-1]), 
                                                    targets, 
                                                    keep_prob, 
                                                    batch_size, 
                                                    seq_len,
                                                    len(answers_mapping),
                                                    len(questions_mapping),
                                                    encoding_embed_size,
                                                    decoding_embed_size,
                                                    rnn_size,
                                                    number_layers,
                                                    questions_mapping)

## Set up the loss Error and Optimizes. Apply gradient clipping to the optimizer. 

In [147]:
with tf.name_scope("optimization"):
    loss_error = tf.contrib.seq2seq.sequence_loss(train_preds, 
                                                  targets,
                                                  tf.ones([input_shape[0], seq_len]))
    optim = tf.train.AdamOptimizer(learning_rate)
    gradients = optim.compute_gradients(loss_error)
    clipped_grads = [(tf.clip_by_value(tensor, -.5, -.5), var) for tensor, var in gradients if tensor is not None]
    optim_gradient_clip = optim.apply_gradients(clipped_grads)

In [148]:
def padding(batch_seqs, hash_words_ints):
    """
    function: padding
    params: batch seqs, hash_words_ints hash table words to integers
    returns: sequence with <PAD> token
    does: Complete sentences with pad tokens, so all tokens have the same length
    """
    max_seq_len = max([len(seqs) for seqs in batch_seqs])
    return [seq + [hash_words_ints["<PAD>"]] * (max_seq_len - len(seq)) for seq in batch_seqs]

In [151]:
## Split the data to batches of questions & answers
def split_into_batches(ques, ans, batch_size):
    """
    function: split_into_batches
    params: list ques, list ans, int batch size
    does: splits data into batches
    returns: batches of data
    """
    for batch_index in range(0, len(ques) // batch_size):
        begin_indx = batch_index * batch_size
        ques_batch = ques[begin_indx : begin_indx + batch_size]
        answ_batch = answ[begin_indx : begin_indx + batch_size]
        padded_ques = np.array(padding(ques, questions_mapping))
        padded_answ = np.array(padding(answ, answers_mapping))
        yield padded_ques, padded_answ

## Cross validation, for training and test sets

In [None]:
training_split = int(len(sorted_questions) * 0.15)
training_ques = sorted_questions[training_split:]
training_answ = sorted_answers[training_split:]
validation_ques = sorted_questions[:training_split]
validation_answ = sorted_answers[:training_split]

In [None]:
## left off here 
training_loss_indx = 100
validation_loss_indx = ((len(training_ques)) // batch_size // 2) - 1
loss_error = 0
loss_error_list = []
stop_check = 0
stop = 1000
checkpoint = "Chatbot_weights.ckpt"
session.run(tf.global_variables_initializer())

for epoch in range(1, epochs + 1):
    for batch_indx, (padded_quest, padded_answ) in enumerate(split_into_batches(training_ques, 
                                                                                training_answ,
                                                                                batch_size)):
        start_time = time.time()
        _, error = session_run([optim_gradient_clip, loss_error], 
                              {inputs: padded_quest, 
                              learning_rate: learning_rate,
                              sequence_length: padded_answ[1]}, 
                              keep_prob: keep_probability)
    