<a href="https://colab.research.google.com/github/abcvivek/Binance-Dummy-Trading-Bot/blob/master/SpellChecker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%tensorflow_version 1.1

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.1`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


In [0]:

""" Importing Libraries
 numpy are used for maths calculations
 tenserflow is used for building Neural Networks
 os will help in managing all the os related stuff 
 re is used as regular expression
train_test_split will help in dividing dataset into training set and testing set """

import numpy as np
import tensorflow as tf
import os
from os.path import isfile, join
import re
import time
from math import ceil
from sklearn.model_selection import train_test_split
from tensorflow.python.layers.core import Dense
from collections import namedtuple

################ LOADING OF DATA ##################

path = '/content/drive/My Drive/clean.txt'

# Function to load the contents of the dataset
     
def load_file(path): 
  input_file = os.path.join(path) 
  with open(input_file) as file:
    File = file.read()
  return File
       
file_content = load_file(path)


#################  Preprocessing of Data  #################

# Create a dictionary to convert the vocabulary (characters) to integers

vocab_to_int = {}
count = 0

for character in file_content:
  if character not in vocab_to_int:
    vocab_to_int[character] = count
    count += 1


codes = ['<PAD>','<EOS>','<GO>']
for code in codes:
  vocab_to_int[code] = count
  count += 1


# Check the size of vocabulary and all of the values

print("The vocabulary contains {} characters.".format(len(vocab_to_int)))
print(sorted(vocab_to_int))


#Create another dictionary to convert integers to their respective characters

int_to_vocab = {}
for character, value in vocab_to_int.items():
    int_to_vocab[value] = character

print(int_to_vocab.items())

# Split the text from the file into sentences.

sentences = []
for sentence in file_content.splitlines():
  sentences.append(sentence)
print(" Dataset contains {} sentences.".format(len(sentences)))


# Convert character sentences to integer sentence

int_sentences = []
for sentence in sentences:
    int_sentence = []
    for character in sentence:
        int_sentence.append(vocab_to_int[character])
    int_sentences.append(int_sentence)


# Limit the data we will use to train our model

max_length = 250
min_length = 30


good_sentences = []
for sentence in int_sentences:
    if len(sentence) <= max_length and len(sentence) >= min_length:
        good_sentences.append(sentence)

print("We will use {} to train and test our model.".format(len(good_sentences)))


# Split the data into training, testing and validation sentences

training, testing = train_test_split(good_sentences, test_size = 0.10, random_state = 2)
testing, validation = train_test_split(testing, test_size = 0.70, random_state = 2)
print("Number of Training sentences:", len(training))
print("Number of Validiation sentences:", len(validation))
print("Number of Testing sentences:", len(testing))


# Sort the sentences by length to reduce padding, which will allow the model to train faster

training_sorted = []
validation_sorted = []
testing_sorted = []

for i in range(min_length, max_length+1):
    for sentence in training:
        if len(sentence) == i:
            training_sorted.append(sentence)

    for sentence in validation:
        if len(sentence) == i:
            validation_sorted.append(sentence)

    for sentence in testing:
        if len(sentence) == i:
            testing_sorted.append(sentence)


# Generate Artificial noise into Correct sentence

letters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

def noise_maker(sentence, threshold):

    '''Relocate, remove, or add characters to create spelling mistakes''' 

    noisy_sentence = []
    i = 0
    while i < len(sentence):
        random = np.random.uniform(0,1,1)
        # Most characters will be correct since the threshold value is high
        if random < threshold:
            noisy_sentence.append(sentence[i])
        else:
            new_random = np.random.uniform(0,1,1)
            # ~33% chance characters will swap locations
            if new_random > 0.67:
                if i == (len(sentence) - 1):
                    # If last character in sentence, it will not be typed
                    continue
                else:
                    # if any other character, swap order with following character
                    noisy_sentence.append(sentence[i+1])
                    noisy_sentence.append(sentence[i])
                    i += 1

            # ~33% chance an extra lower case letter will be added to the sentence
            elif new_random < 0.33:
                random_letter = np.random.choice(letters, 1)[0]
                noisy_sentence.append(vocab_to_int[random_letter])
                noisy_sentence.append(sentence[i])

            # ~33% chance a character will not be typed
            else:
                pass     
        i += 1
    return noisy_sentence


# Check to ensure noise_maker is making mistakes correctly.

threshold = 0.9
for sentence in training_sorted[:5]:
    print(sentence)
    print(noise_maker(sentence, threshold))
    print()


###########################  Building the Model  ###############################

# Model input pipes for data feed to the model

'''
Tensorflow placeholders acts as pipes into the model.
name_scope makes sure so given values are from the same graph.
'''

def model_inputs():

    with tf.name_scope('inputs'):
        # ARGS: dtype, shape of the tensor to be fed, name for operation
        inputs = tf.placeholder(tf.int32, [None, None], name='inputs')

    with tf.name_scope('targets'):
        targets = tf.placeholder(tf.int32, [None, None], name='targets')

    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    inputs_length = tf.placeholder(tf.int32, (None,), name='inputs_length')
    targets_length = tf.placeholder(tf.int32, (None,), name='targets_length')

    # ARGS: input tensor, name for operation
    max_target_length = tf.reduce_max(targets_length, name='max_target_len')

    return inputs, targets, keep_prob, inputs_length, targets_length, max_target_length


# Remove last word from each batch, add <GO> token to the start of each batch

def process_encoding_input(targets, vocab_to_int, batch_size):

    with tf.name_scope("processing_encoding"):
        ending = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1])
        dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1)
    return dec_input


#Encoding layer takes the input sequence and creates a encoded representation

def encoding_layer(rnn_size, sequence_length, num_layers, rnn_inputs, keep_prob, direction):

    if direction == 1:
        with tf.name_scope("RNN_Encoder_Cell_1D"):
            for layer in range(num_layers):
                with tf.variable_scope('encoder_{0}'.format(layer)):
                    lstm = tf.contrib.rnn.LSTMCell(rnn_size)
                    drop = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
                    enc_output, enc_state = tf.nn.dynamic_rnn(drop, rnn_inputs, sequence_length, dtype=tf.float32)
            return enc_output, enc_state

    if direction == 2:
        with tf.name_scope("RNN_Encoder_Cell_2D"):
            for layer in range(num_layers):
                with tf.variable_scope('encoder_{0}'.format(layer)):
                    cell_fw = tf.contrib.rnn.LSTMCell(rnn_size)
                    cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)

                    cell_bw = tf.contrib.rnn.LSTMCell(rnn_size)
                    cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)

                    enc_output, enc_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, rnn_inputs, sequence_length, dtype=tf.float32)

            # Concat outputs
            enc_output = tf.concat(enc_output, 2)

            # Use only forwarded state
            return enc_output, enc_state[0]


# Create training logits

def training_decoding_layer(dec_embed_input, targets_length, dec_cell, initial_state, output_layer, vocab_size, max_target_length):

    with tf.name_scope("Training_Decoder"):
        training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input, sequence_length=targets_length, time_major=False)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, training_helper, initial_state, output_layer)
        training_logits, _, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder, output_time_major=False, impute_finished=True, maximum_iterations=max_target_length)

        return training_logits

# Create inference logits

def inference_decoding_layer(embeddings, start_token, end_token, dec_cell, initial_state, output_layer, max_target_length, batch_size):
     
     print(max_target_length)

     with tf.name_scope("Inference_Decoder"):
        start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [batch_size], name='start_tokens')

        inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings, start_tokens, end_token)
        inference_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, inference_helper, initial_state, output_layer)
        inference_logits, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder, output_time_major=False, impute_finished=True, maximum_iterations=max_target_length)

        return inference_logits

#Create the decoding cell and attention.

def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, inputs_length, targets_length, max_target_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers, direction):

    with tf.name_scope("RNN_Decoder_Cell"):
        for layer in range(num_layers):
            with tf.variable_scope('decoder_{}'.format(layer)):
                lstm = tf.contrib.rnn.LSTMCell(rnn_size)
                dec_cell = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)

    output_layer = Dense(vocab_size, kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))

    attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size, enc_output, inputs_length, normalize=False, name='BahdanauAttention')

    with tf.name_scope("Attention_Wrapper"):
        dec_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attn_mech, rnn_size)
    initial_state = dec_cell.zero_state(dtype=tf.float32, batch_size=batch_size)
    initial_state = initial_state.clone(cell_state=enc_state)

    with tf.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input, targets_length, dec_cell, initial_state, output_layer, vocab_size, max_target_length)
    with tf.variable_scope("decode", reuse=True):
        inference_logits = inference_decoding_layer(embeddings, vocab_to_int['<GO>'], vocab_to_int['<EOS>'], dec_cell, initial_state, output_layer, max_target_length, batch_size)

    return training_logits, inference_logits

def seq2seq_model(inputs, targets, keep_prob, inputs_length, targets_length, max_target_length, vocab_size, rnn_size, num_layers, vocab_to_int, batch_size, embedding_size, direction):

    enc_embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1, 1))
    enc_embed_input = tf.nn.embedding_lookup(enc_embeddings, inputs)
    enc_output, enc_state = encoding_layer(rnn_size, inputs_length, num_layers, enc_embed_input, keep_prob, direction)

    dec_embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1, 1))
    dec_input = process_encoding_input(targets, vocab_to_int, batch_size)
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

    training_logits, inference_logits  = decoding_layer(dec_embed_input, dec_embeddings, enc_output, enc_state, vocab_size, inputs_length, targets_length, max_target_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers, direction)

    return training_logits, inference_logits


###############   Hyper-Parameters   ##################

# The default parameters

epochs = 100
batch_size = 64
num_layers = 4
rnn_size = 512
embedding_size = 128
learning_rate = 0.0005
direction = 2
threshold = 0.90
keep_probability = 0.65 #0.75


def build_accuracy(predictions, targets):
    correct_prediction = tf.equal(tf.cast(tf.round(predictions), tf.int32), targets)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy

# Build graph

def build_graph(keep_prob, rnn_size, num_layers, batch_size, learning_rate, embedding_size, direction):

    tf.reset_default_graph()

    # Load model inputs
    inputs, targets, keep_prob, inputs_length, targets_length, max_target_length = model_inputs()

    # Create the training and inference logits
    training_logits, inference_logits = seq2seq_model(tf.reverse(inputs, [-1]), targets, keep_prob, inputs_length, targets_length, max_target_length, len(vocab_to_int)+1, rnn_size, num_layers, vocab_to_int, batch_size, embedding_size, direction)

    # Create tensors for the training logits and inference logits
    training_logits = tf.identity(training_logits.rnn_output, 'logits')

    with tf.name_scope('predictions'):
        predictions = tf.identity(inference_logits.sample_id, name='predictions')
        tf.summary.histogram('predictions', predictions)

    # Create the weights for sequence_loss
    masks = tf.sequence_mask(targets_length, max_target_length, dtype=tf.float32, name='masks')

    with tf.name_scope("cost"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks)
        tf.summary.scalar('cost', cost)

    with tf.name_scope("optimze"):
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

    accuracy = build_accuracy(predictions, targets)

    # Merge summaries
    merged = tf.summary.merge_all()

    # Export the nodes
    export_nodes = ['inputs', 'targets', 'keep_prob', 'cost', 'inputs_length', 'targets_length', 'predictions', 'merged', 'train_op','accuracy', 'optimizer']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])

    return graph


def pad_sentence_batch(sentence_batch):

    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""

    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [vocab_to_int['<PAD>']] * (max_sentence - len(sentence)) for sentence in sentence_batch]



def get_batches(sentences, batch_size, threshold):

    """Batch sentences, noisy sentences, and the lengths of their sentences together.
       With each epoch, sentences will receive new mistakes"""

    for batch_i in range(0, len(sentences)//batch_size):
        start_i = batch_i * batch_size
        sentences_batch = sentences[start_i:start_i + batch_size]
        
        sentences_batch_noisy = []
        for sentence in sentences_batch:
            sentences_batch_noisy.append(noise_maker(sentence, threshold))

        sentences_batch_eos = []
        for sentence in sentences_batch:
            sentence.append(vocab_to_int['<EOS>'])
            sentences_batch_eos.append(sentence)

        pad_sentences_batch = np.array(pad_sentence_batch(sentences_batch_eos))
        pad_sentences_noisy_batch = np.array(pad_sentence_batch(sentences_batch_noisy))

        # Need the lengths for the _lengths parameters

        pad_sentences_lengths = []
        for sentence in pad_sentences_batch:
            pad_sentences_lengths.append(len(sentence))
        

        pad_sentences_noisy_lengths = []
        for sentence in pad_sentences_noisy_batch:
            pad_sentences_noisy_lengths.append(len(sentence))

        yield pad_sentences_noisy_batch, pad_sentences_batch, pad_sentences_noisy_lengths, pad_sentences_lengths


################## Training the Model   ######################

def MakeSentenceReadable(correct):
  correct_sentence = ""
  for i in correct:
    if i < 28:
      correct_sentence += int_to_vocab[i]
  return correct_sentence.strip()
      


def train(model, epochs):

    '''Train the RNN'''    

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()

        # Used to determine when to stop the training early
        testing_loss_summary = []

        # Keep track of which batch iteration is being trained
        iteration = 0
        display_step = 30 # The progress of the training will be displayed after every 30 batches
        stop_early = 0 
        stop = 5 # If the batch_loss_testing does not decrease in 3 consecutive checks, stop training
        per_epoch = 1 # Test the model 3 times per epoch
        testing_check = (len(training_sorted)//batch_size//per_epoch)-1
        is_correct = 0

       
        for epoch_i in range(1, epochs+1): 
            batch_loss = 0
            batch_time = 0

            checkpoint = "./Model-{}.ckpt".format(epoch_i)

            print()
            print("Training Model: {}".format(epoch_i))

            train_writer = tf.summary.FileWriter('./logs/1/train/{}'.format(epoch_i), sess.graph)
            test_writer = tf.summary.FileWriter('./logs/1/test/{}'.format(epoch_i))


             # Per batch
            for batch_i, (input_batch, target_batch, input_length, target_length) in enumerate(get_batches(training_sorted,batch_size,threshold)):
                start_time = time.time()
                summary, loss, _ = sess.run([model.merged, model.cost, model.train_op],
                                             {model.inputs: input_batch,
                                              model.targets: target_batch,
                                              model.inputs_length: input_length,
                                              model.targets_length: target_length,
                                              model.keep_prob: keep_probability})

                batch_loss += loss
                end_time = time.time()
                batch_time += end_time - start_time

                # Record the progress of training
                train_writer.add_summary(summary, iteration)

                iteration += 1

                # Print info
                if batch_i % display_step == 0 and batch_i > 0:
                    print('Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f}, Seconds: {:>4.2f}'
                          .format(epoch_i,
                                  epochs, 
                                  batch_i, 
                                  len(training_sorted) // batch_size, 
                                  batch_loss / display_step, 
                                  batch_time))
                    # Reset

                    batch_loss = 0
                    batch_time = 0


                #### Run Validation Testing ####

                if batch_i % testing_check == 0 and batch_i > 0:
                    batch_loss_testing = 0
                    batch_time_testing = 0

                    for batch_i, (input_batch, target_batch, input_length, target_length) in enumerate(get_batches(validation_sorted, batch_size, threshold)):
                        start_time_testing = time.time()
                        summary, loss = sess.run([model.merged, model.cost],
                                                     {model.inputs: input_batch,
                                                      model.targets: target_batch,
                                                      model.inputs_length: input_length,
                                                      model.targets_length: target_length,
                                                      model.keep_prob: 1})


                        batch_loss_testing += loss
                        end_time_testing = time.time()
                        batch_time_testing += end_time_testing - start_time_testing

                        # Record the progress of testing

                        test_writer.add_summary(summary, iteration)

                    n_batches_testing = batch_i + 1

                    # Print Result

                    for i in range(100, 120):

                        correct = validation_sorted[i]
                        text = noise_maker(validation_sorted[i],threshold)
                        answer_logits = sess.run(model.predictions, {model.inputs: [text]* batch_size,
                                                                 model.inputs_length: [len(text)]* batch_size,
                                                                 model.targets_length: [len(text)+1],
                                                                 model.keep_prob: [1.0]})[0]

                        correct_sentence = MakeSentenceReadable(correct)
                        text_sentence = MakeSentenceReadable(text)
                        answer_logits_sentence = MakeSentenceReadable(answer_logits)

                       
                        if (answer_logits_sentence == correct_sentence):
                            is_correct += 1

                        # # Remove <PAD> from output
                        # pad = vocab_to_int["<PAD>"]
                        # eos = vocab_to_int["<EOS>"]

                        # answer_logits = "".join([int_to_vocab[i] for i in answer_logits if i != eos])
                        # answer_logits.strip()
                        # #answer_logits = "".join([int_to_vocab[i] for i in answer_logits if i != pad])
                        # correct = "".join([int_to_vocab[i] for i in correct if i != eos])
                        # correct.strip()

                        print('  Validation Input: {}'.format(text_sentence))
                        print('  Validation Output: {}'.format(answer_logits_sentence))
                        print('  Correct: {}'.format(correct_sentence))
                        print('  Is Correct: {}'.format(answer_logits_sentence == correct_sentence))
                        print()
                    

                    batch_time_testing = 0
                    print('Testing Loss: {:>6.3f}, Seconds: {:>4.2f}'.format(batch_loss_testing / n_batches_testing, batch_time_testing))

                    # If the batch_loss_testing is at a new minimum, save the model


                    testing_loss_summary.append(batch_loss_testing)

                    if is_correct > 8:
                        print('New Accuracy Record!') 
                        stop_early = 0
                        checkpoint = "/content/drive/My Drive/Colab Notebooks/Model-{}.ckpt".format(is_correct)
                        saver = tf.train.Saver()
                        saver.save(sess, checkpoint)
                    else:
                        if batch_loss_testing <= min(testing_loss_summary):
                            print('New Loss Record!') 
                            stop_early = 0
                            checkpoint = "/content/drive/My Drive/Colab Notebooks/Model-{}.ckpt".format(epoch_i)
                            saver = tf.train.Saver()
                            saver.save(sess, checkpoint)
                        else:
                            print("No Improvement.")
                            stop_early += 1
                            if stop_early == stop:
                                break

            if stop_early == stop:
                print("Stopping Training.")
                break


# Train the model with the desired tuning parameters

for keep_probability in [0.65]:
    for num_layers in [4]:
        for threshold in [0.90]:
            
            model = build_graph(keep_probability, rnn_size, num_layers, batch_size, learning_rate, embedding_size, direction)
            train(model, epochs)


#################   Testing the Model   ################

def test(model,testing_set):
    # Start session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()


        print()
        print("Testing LSTM Model")

        testing_check = (len(testing_set)//batch_size//1)-1
        tested = 0
        is_correct = 0
        checkpoint = "/content/drive/My Drive/Colab Notebooks/Model-11.ckpt"
        saver.restore(sess, checkpoint)

        # Per batch
        for batch_i, (input_batch, target_batch, input_length, target_length) in enumerate(get_batches(testing_set,batch_size,threshold)):
            if batch_i % testing_check == 0 and batch_i > 0:  
                
                print_tested_each = 100

                for i in range(0, len(testing_set)):

                    if (tested > print_tested_each):
                        print_tested_each  += 100
                        print("Tested {}% of test set".format((ceil(i / len(testing_set) * 100) * 100) / 100.0))

                    text = noise_maker(testing_set[i],threshold)
                    correct = testing_set[i]
                    answer_logits = sess.run(model.predictions, {model.inputs: [text]*batch_size,
                                                             model.inputs_length: [len(text)]*batch_size,
                                                             model.targets_length: [len(text)+1],
                                                             model.keep_prob: [1.0]})[0]

                    # # Remove <PAD> from output
                    # pad = vocab_to_int["<PAD>"]
                    # eos = vocab_to_int["<EOS>"]

                    # answer_logits = "".join([int_to_vocab[i] for i in answer_logits if i != eos])
                    # #answer_logits = "".join([int_to_vocab[i] for i in answer_logits if i != pad])
                    # correct = "".join([int_to_vocab[i] for i in correct if i != eos])

                    correct_sentence = MakeSentenceReadable(correct)
                    text_sentence = MakeSentenceReadable(text)
                    answer_logits_sentence = MakeSentenceReadable(answer_logits)

                    tested += 1
                    if (answer_logits_sentence == correct_sentence):
                        is_correct += 1

                # Reset
                print("Accuracy %: {}%".format((ceil((is_correct / tested) * 100) * 100) / 100.0))
                print("Exact Accuracy: {}".format(is_correct / tested))
                
                return is_correct / tested

for keep_probability in [0.65]:
    for num_layers in [4]:
        for threshold in [0.90]:
            model = build_graph(keep_probability, rnn_size, num_layers, batch_size, learning_rate, embedding_size, direction)
            total = 0
            total = test(model, testing_sorted)

            print("Total Accuracy %: {}".format((ceil((total / 3) * 100) * 100) / 100.0))
            print("Total Accuracy Exact: {}".format(total / 3))




# ## Fixing Custom Sentences

def text_to_ints(text):

    '''Prepare the text for the model'''

    return [vocab_to_int[word] for word in text]



# Create your own sentence or use one from the dataset

text = "spellin is difficult whch is wyh you need to study everyday"

text = text_to_ints(text)

checkpoint = checkpoint = checkpoint = "./{}.ckpt".format(log_string)

model = build_graph(keep_probability, rnn_size, num_layers, batch_size, learning_rate, embedding_size, direction) 


with tf.Session() as sess:

    # Load saved model

    saver = tf.train.Saver()
    saver.restore(sess, checkpoint)

    #Multiply by batch_size to match the model's input parameters

    answer_logits = sess.run(model.predictions, {model.inputs: [text]*batch_size, 
                                                 model.inputs_length: [len(text)]*batch_size,
                                                 model.targets_length: [len(text)+1], 
                                                 model.keep_prob: [1.0]})[0]

# Remove the padding from the generated sentence

eos = vocab_to_int["<EOS>"] 
print('\nText'),
print('  Word Ids:    {}'.format([i for i in text]))
print('  Input Words: {}'.format("".join([int_to_vocab[i] for i in text])))

print('\nSummary')
print('  Word Ids:       {}'.format([i for i in answer_logits if i != eos]))
print('  Response Words: {}'.format("".join([int_to_vocab[i] for i in answer_logits if i != eos])))



The vocabulary contains 31 characters.
['\n', ' ', '<EOS>', '<GO>', '<PAD>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
dict_items([(0, 't'), (1, 'h'), (2, 'i'), (3, 's'), (4, ' '), (5, 'w'), (6, 'a'), (7, 'y'), (8, 'o'), (9, 'u'), (10, 'c'), (11, 'v'), (12, 'e'), (13, 'r'), (14, 'b'), (15, 'p'), (16, 'f'), (17, 'l'), (18, 'd'), (19, 'n'), (20, '\n'), (21, 'j'), (22, 'm'), (23, 'k'), (24, 'g'), (25, 'x'), (26, 'z'), (27, 'q'), (28, '<PAD>'), (29, '<EOS>'), (30, '<GO>')])
 Dataset contains 1232681 sentences.
We will use 1017165 to train and test our model.
Number of Training sentences: 915448
Number of Validiation sentences: 71202
Number of Testing sentences: 30515
[6, 4, 5, 6, 13, 19, 2, 19, 24, 4, 5, 6, 3, 4, 24, 2, 11, 12, 19, 4, 14, 7, 4, 2, 3, 6, 6, 10, 4, 22]
[6, 4, 5, 6, 13, 2, 19, 24, 4, 5, 6, 3, 4, 24, 2, 12, 11, 19, 4, 14, 7, 4, 2, 3, 6, 6, 22, 4]

[6, 4, 16, 12, 5, 4, 15, 12, 0, 13, 8, 17, 