# Deep Learning

## Create a Chatbox

### Step 1: Prepare Project

   1. Load libraries
   2. Load dataset

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
import time
import operator
import pickle
from itertools import islice
tf.__version__

'1.12.0'

In [2]:
# Load the data
lines = open('/cornell_movie_dialogs_corpus/movie_lines.txt').read().split('\n')
convs = open('/cornell_movie_dialogs_corpus/45000_conversations.txt').read().split('\n')

### Step 2: Define Problem

##### What is your task? What are your goals? What do you want to achieve?

Our task is to build a “chatbox”, a conversational model, which predicts the next sentence given the previous sentence or sentences in a conversation. The goal is to be able to have a "normal" conversation with the model.

### Step 3: Exploratory Analysis

##### Understand your data: Take a “peek” of your data, answer basic questions about the dataset. Summarise your data. Explore descriptive statistics and visualisations.

In [3]:
# The sentences that we will be using to train our model.
lines[:5]

['L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!',
 'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!',
 'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.',
 'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?',
 "L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go."]

In [4]:
# The sentences' ids, which will be processed to become our input and target data.
convs[:5]

["u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L194', 'L195', 'L196', 'L197']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L204', 'L205', 'L206']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L207', 'L208']"]

In [6]:
# We are using 45000 conversations as otherwise it produces a Resource Exhausted Error
len(convs)

45000

### Step 4: Prepare Data

##### Data Cleaning/Data Wrangling/Collect more data (if necessary).

In [7]:
# Create a dictionary to map each line's id with its text
id_line = {}
for line in lines:
    temp_line = line.split(' +++$+++ ')
    if len(temp_line) == 5:
        id_line[temp_line[0]] = temp_line[4]
list(islice(id_line.iteritems(), 5))

[('L479748', 'And what did you say?'),
 ('L638646', 'Oh, yes. You noticed the way she does her hair.'),
 ('L525290', 'Cool!'),
 ('L638647',
  'Something else. My wife, Madeleine, has several pieces of jewelry that belonged to Carlotta. She inherited them. Never wore them, they were too old-fashioned... until now. Now, when she is alone, she gets them out and looks at them handles them gently, curiously... puts them on and stares at herself in the mirror... and goes into that other world... is someone else again.'),
 ('L345355',
  'For what? Breaking my heart or ruining sex for me with any other man?')]

In [8]:
# Create a list of all of the conversations' lines' ids.
convos = [ ]
for line in convs[:-1]:
    temp_line = line.split(' +++$+++ ')[-1][1:-1].replace("'","").replace(" ","")
    convos.append(temp_line.split(','))
convos[:5]

[['L194', 'L195', 'L196', 'L197'],
 ['L198', 'L199'],
 ['L200', 'L201', 'L202', 'L203'],
 ['L204', 'L205', 'L206'],
 ['L207', 'L208']]

In [9]:
# Sort the sentences into questions (inputs) and answers (targets)
inputs = []
targets = []

for conv in convos:
    for i in range(len(conv)-1):
        inputs.append(id_line[conv[i]])
        targets.append(id_line[conv[i+1]])

In [10]:
# Check if we have loaded the data correctly
for i in range(0, 3):
    print(inputs[i])
    print(targets[i])

Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.
Well, I thought we'd start with pronunciation, if that's okay with you.
Well, I thought we'd start with pronunciation, if that's okay with you.
Not the hacking and gagging and spitting part.  Please.
Not the hacking and gagging and spitting part.  Please.
Okay... then how 'bout we try out some French cuisine.  Saturday?  Night?


In [11]:
# Compare lengths of questions and answers
print(len(inputs))
print(len(targets))

121315
121315


In [12]:
def clean_text(text):
    '''Clean text by removing unnecessary characters and altering the format of words.'''

    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

In [13]:
# Clean the data
clean_inputs = []
for question in inputs:
    clean_inputs.append(clean_text(question))
    
clean_targets = []    
for answer in targets:
    clean_targets.append(clean_text(answer))

In [14]:
# Take a look at some of the data to ensure that it has been cleaned well.
for i in range(0, 3):
    print(clean_inputs[i])
    print(clean_targets[i])

can we make this quick  roxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quad  again
well i thought we would start with pronunciation if that is okay with you
well i thought we would start with pronunciation if that is okay with you
not the hacking and gagging and spitting part  please
not the hacking and gagging and spitting part  please
okay then how about we try out some french cuisine  saturday  night


In [15]:
# Find the length of sentences
length = []
for inp in clean_inputs:
    length.append(len(inp.split()))
for targ in clean_targets:
    length.append(len(targ.split()))

# Create a dataframe so that the values can be inspected
length = pd.DataFrame(length, columns=['Length'])
length[:5]

Unnamed: 0,Length
0,22
1,14
2,9
3,14
4,13


In [16]:
# Compute the 5th and the 95th percentile. The length there is 1 and 32 respectively.
print(np.percentile(length, 5))
print(np.percentile(length, 95))

1.0
32.0


In [17]:
# Remove questions and answers that are shorter than 1 words and longer than 30 words.
min_length = 1
max_length = 30

# Filter out the questions that are too short/long
short_inputs_tmp = []
short_targets_tmp = []

i = 0
for inp in clean_inputs:
    if len(inp.split()) >= min_length and len(inp.split()) <= max_length:
        short_inputs_tmp.append(inp)
        short_targets_tmp.append(clean_targets[i])
    i += 1

# Filter out the answers that are too short/long
short_inputs = []
short_targets = []

i = 0
for inp in short_inputs_tmp:
    if len(inp.split()) >= min_length and len(inp.split()) <= max_length:
        short_inputs.append(inp)
        short_targets.append(short_targets_tmp[i])
    i += 1

In [18]:
# Compare the number of lines we will use with the total number of lines.
print("# of inputs:", len(short_inputs))
print("# of targets:", len(short_targets))

('# of inputs:', 114408)
('# of targets:', 114408)


In [19]:
short_targets[0]

'well i thought we would start with pronunciation if that is okay with you'

### Step 5: Feature Engineering

##### Feature selection/feture engineering (as in new features)/data transformations. 


In [20]:
# Create a dictionary for the frequency of the vocabulary
vocabulary = {}
for inp in short_inputs:
    for token in inp.split():
        if token not in vocabulary:
            vocabulary[token] = 1
        else:
            vocabulary[token] += 1
            
for targ in short_targets:
    for token in targ.split():
        if token not in vocabulary:
            vocabulary[token] = 1
        else:
            vocabulary[token] += 1

vocabulary["life"]

1729

In [21]:
# Remove rare words from the vocabulary.
threshold = 5
count = 0
for k,v in vocabulary.items():
    if v >= threshold:
        count += 1

In [22]:
print("Size of total vocab:", len(vocabulary))
print("Size of vocab we will use:", count)

('Size of total vocab:', 44087)
('Size of vocab we will use:', 13880)


In [23]:
# Create dictionaries with a unique integer for each word.
inputs_int = {}
targets_int = {}

number = 0
for inp, value in vocabulary.items():
    if value >= threshold:
        inputs_int[inp] = number
        number += 1
        
number = 0
for targ, value in vocabulary.items():
    if value >= threshold:
        targets_int[targ] = number
        number += 1

In [24]:
# Add the unique tokens to the vocabulary dictionaries.
codes = ['<PAD>','<EOS>','<UNK>','<GO>']

for code in codes:
    inputs_int[code] = len(inputs_int)
    
for code in codes:
    targets_int[code] = len(targets_int)

In [25]:
# Create inverse dictionary for vocabulary to integer.
inputs_vocab = {value: inp for inp, value in inputs_int.items()}
targets_vocab = {value: targ for targ, value in targets_int.items()}

In [26]:
# Check the length of the dictionaries.
print(len(inputs_int))
print(len(inputs_vocab))
print(len(targets_int))
print(len(targets_vocab))

13884
13884
13884
13884


In [27]:
# Add the end of sentence token to the end of every answer.
for i in range(len(short_targets)):
    short_targets[i] += ' <EOS>'

In [28]:
# Convert the text to integers. 
# Replace any words that are not in the respective vocabulary with <UNK> 
input_sent_int = []
for inp in short_inputs:
    lst_int = []
    for token in inp.split():
        if token not in inputs_int:
            lst_int.append(inputs_int['<UNK>'])
        else:
            lst_int.append(inputs_int[token])
    input_sent_int.append(lst_int)
    
target_sent_int = []
for targ in short_targets:
    lst_int = []
    for token in targ.split():
        if token not in targets_int:
            lst_int.append(targets_int['<UNK>'])
        else:
            lst_int.append(targets_int[token])
    target_sent_int.append(lst_int)

In [29]:
# Check the lengths
print(len(input_sent_int))
print(len(target_sent_int))

114408
114408


In [82]:
# Sort the input and target sentences to avoid extra padding.
sorted_inputs = []
sorted_targets = []

for length in range(1, max_length+1):
    for i in enumerate(inputs_int):
        if len(i[1]) == length:
            sorted_inputs.append(input_sent_int[i[0]])
            sorted_targets.append(target_sent_int[i[0]])

print(len(sorted_inputs))
print(len(sorted_targets))

for i in range(3):
    print(sorted_inputs[i])
    print(sorted_targets[i])

13884
13884
[5116, 12162, 5665, 7884, 11069, 5274, 7098, 5081, 12278, 5274]
[11591, 12278, 9919, 13881]
[12787, 8650, 5283, 5081, 3686, 7230]
[2972, 5990, 1127, 13219, 9678, 13881]
[1801]
[8507, 5081, 6083, 4405, 2702, 6426, 6055, 4225, 13882, 7257, 13882, 13170, 8614, 1694, 7822, 12207, 13881]


### Step 6: Algorithm Selection

##### Select a set of algorithms to apply, select evaluation metrics, and evaluate/compare algorithms.

We are going to use an encoder and a decoder, as well as the seq2seq model as this problem can be seen as a translation problem. 

In [33]:
# Function for creating the placeholders for the inputs to the model
def enc_dec_model_inputs():
    '''Create placeholders for inputs to the model'''
    input_data = tf.placeholder(tf.int32, [None, None], name='input')
    targets = tf.placeholder(tf.int32, [None, None], name='targets')
    
    target_sequence_length = tf.placeholder(tf.int32, [None], name='target_sequence_length')
    max_target_len = tf.reduce_max(target_sequence_length)  
    
    lr = tf.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')

    return input_data, targets, target_sequence_length, max_target_len, lr, keep_prob

In [35]:
# Function for processing the target data and add a '<GO>' id in front of each sentence
def process_decoder_input(target_data, target_vocab_to_int, batch_size):
    # get '<GO>' id
    go_id = target_vocab_to_int['<GO>']
    
    after_slice = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
    after_concat = tf.concat( [tf.fill([batch_size, 1], go_id), after_slice], 1)
    
    return after_concat

In [36]:
# Functio for creating the encoding layer
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, 
                   source_vocab_size, 
                   encoding_embedding_size):
    """
    :return: tuple (RNN output, RNN state)
    """
    embed = tf.contrib.layers.embed_sequence(rnn_inputs, 
                                             vocab_size=source_vocab_size, 
                                             embed_dim=encoding_embedding_size)
    
    stacked_cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(rnn_size), keep_prob) for _ in range(num_layers)])
    
    outputs, state = tf.nn.dynamic_rnn(stacked_cells, 
                                       embed, 
                                       dtype=tf.float32)
    return outputs, state

In [37]:
# Function for creating the decoding training layer
def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, 
                         target_sequence_length, max_summary_length, 
                         output_layer, keep_prob):
    """
    Create a training process in decoding layer 
    :return: BasicDecoderOutput containing training logits and sample_id
    """
    dec_cell = tf.contrib.rnn.DropoutWrapper(dec_cell, 
                                             output_keep_prob=keep_prob)
    
    # for only input layer
    helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, 
                                               target_sequence_length)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, 
                                              helper, 
                                              encoder_state, 
                                              output_layer)

    # unrolling the decoder layer
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, 
                                                      impute_finished=True, 
                                                      maximum_iterations=max_summary_length)
    return outputs

In [38]:
# Function for creating the decoding inference layer
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
                         end_of_sequence_id, max_target_sequence_length,
                         vocab_size, output_layer, batch_size, keep_prob):
    """
    Create a inference process in decoding layer 
    :return: BasicDecoderOutput containing inference logits and sample_id
    """
    dec_cell = tf.contrib.rnn.DropoutWrapper(dec_cell, 
                                             output_keep_prob=keep_prob)
    
    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(dec_embeddings, 
                                                      tf.fill([batch_size], start_of_sequence_id), 
                                                      end_of_sequence_id)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, 
                                              helper, 
                                              encoder_state, 
                                              output_layer)
    
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, 
                                                      impute_finished=True, 
                                                      maximum_iterations=max_target_sequence_length)
    return outputs

In [39]:
# Function for creating the decoding layer
def decoding_layer(dec_input, encoder_state,
                   target_sequence_length, max_target_sequence_length,
                   rnn_size,
                   num_layers, target_vocab_to_int, target_vocab_size,
                   batch_size, keep_prob, decoding_embedding_size):
    """
    Create decoding layer
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    target_vocab_size = len(target_vocab_to_int)
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
    
    cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(rnn_size) for _ in range(num_layers)])
    
    with tf.variable_scope("decode"):
        output_layer = tf.layers.Dense(target_vocab_size)
        train_output = decoding_layer_train(encoder_state, 
                                            cells, 
                                            dec_embed_input, 
                                            target_sequence_length, 
                                            max_target_sequence_length, 
                                            output_layer, 
                                            keep_prob)

    with tf.variable_scope("decode", reuse=True):
        infer_output = decoding_layer_infer(encoder_state, 
                                            cells, 
                                            dec_embeddings, 
                                            target_vocab_to_int['<GO>'], 
                                            target_vocab_to_int['<EOS>'], 
                                            max_target_sequence_length, 
                                            target_vocab_size, 
                                            output_layer,
                                            batch_size,
                                            keep_prob)

    return (train_output, infer_output)

In [40]:
# Function for creating the sq2seq model
def seq2seq_model(input_data, target_data, keep_prob, batch_size,
                  target_sequence_length,
                  max_target_sentence_length,
                  source_vocab_size, target_vocab_size,
                  enc_embedding_size, dec_embedding_size,
                  rnn_size, num_layers, target_vocab_to_int):
    """
    Build the Sequence-to-Sequence model
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    enc_outputs, enc_states = encoding_layer(input_data, 
                                             rnn_size, 
                                             num_layers, 
                                             keep_prob, 
                                             source_vocab_size, 
                                             enc_embedding_size)
    
    dec_input = process_decoder_input(target_data, 
                                      target_vocab_to_int, 
                                      batch_size)
    
    train_output, infer_output = decoding_layer(dec_input,
                                               enc_states, 
                                               target_sequence_length, 
                                               max_target_sentence_length,
                                               rnn_size,
                                              num_layers,
                                              target_vocab_to_int,
                                              target_vocab_size,
                                              batch_size,
                                              keep_prob,
                                              dec_embedding_size)
    
    return train_output, infer_output

In [41]:
# Set the Hyperparameters
display_step = 100

epochs = 2
batch_size = 64

rnn_size = 64
num_layers = 2

encoding_embedding_size = 100
decoding_embedding_size = 100

learning_rate = 0.001
keep_probability = 0.5

### Step 7: Model Training

##### Apply ensembles and improve performance by hyperparameter optimisation.

In [42]:
save_path = 'checkpoints/dev'
max_target_sentence_length = max([len(sentence) for sentence in sorted_inputs])

train_graph = tf.Graph()
with train_graph.as_default():
    input_data, targets, target_sequence_length, max_target_sequence_length, lr, keep_prob = enc_dec_model_inputs()
    
    train_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]),
                                                   targets,
                                                   keep_prob,
                                                   batch_size,
                                                   target_sequence_length,
                                                   max_target_sequence_length,
                                                   len(sorted_inputs),
                                                   len(sorted_targets),
                                                   encoding_embedding_size,
                                                   decoding_embedding_size,
                                                   rnn_size,
                                                   num_layers,
                                                   targets_int)
    
    training_logits = tf.identity(train_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')

    # https://www.tensorflow.org/api_docs/python/tf/sequence_mask
    # - Returns a mask tensor representing the first N positions of each cell.
    masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        # Loss function - weighted softmax cross entropy
        cost = tf.contrib.seq2seq.sequence_loss(
            training_logits,
            targets,
            masks)

        # Optimizer
        optimizer = tf.train.AdamOptimizer(lr)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

In [43]:
def pad_sentence_batch(sentence_batch, pad_int):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]


def get_batches(sources, targets, batch_size, source_pad_int, target_pad_int):
    """Batch targets, sources, and the lengths of their sentences together"""
    for batch_i in range(0, len(sources)//batch_size):
        start_i = batch_i * batch_size

        # Slice the right amount for the batch
        sources_batch = sources[start_i:start_i + batch_size]
        targets_batch = targets[start_i:start_i + batch_size]

        # Pad
        pad_sources_batch = np.array(pad_sentence_batch(sources_batch, source_pad_int))
        pad_targets_batch = np.array(pad_sentence_batch(targets_batch, target_pad_int))

        # Need the lengths for the _lengths parameters
        pad_targets_lengths = []
        for target in pad_targets_batch:
            pad_targets_lengths.append(len(target))

        pad_source_lengths = []
        for source in pad_sources_batch:
            pad_source_lengths.append(len(source))

        yield pad_sources_batch, pad_targets_batch, pad_source_lengths, pad_targets_lengths

In [44]:
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))

# Split data to training and validation sets
train_source = sorted_inputs[batch_size:]
train_target = sorted_targets[batch_size:]
valid_source = sorted_inputs[:batch_size]
valid_target = sorted_targets[:batch_size]
(valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths)= next(get_batches(valid_source,
                                                                                                             valid_target,
                                                                                                             batch_size,
                                                                                                             inputs_int['<PAD>'],
                                                                                                             targets_int['<PAD>']))                                                                                                  
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch_i in range(epochs):
        for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate(
                get_batches(train_source, train_target, batch_size,
                            inputs_int['<PAD>'],
                            targets_int['<PAD>'])):

            _, loss = sess.run(
                [train_op, cost],
                {input_data: source_batch,
                 targets: target_batch,
                 lr: learning_rate,
                 target_sequence_length: targets_lengths,
                 keep_prob: keep_probability})


            if batch_i % display_step == 0 and batch_i > 0:
                batch_train_logits = sess.run(
                    inference_logits,
                    {input_data: source_batch,
                     target_sequence_length: targets_lengths,
                     keep_prob: 1.0})

                batch_valid_logits = sess.run(
                    inference_logits,
                    {input_data: valid_sources_batch,
                     target_sequence_length: valid_targets_lengths,
                     keep_prob: 1.0})

                train_acc = get_accuracy(target_batch, batch_train_logits)
                valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits)

                print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}'
                      .format(epoch_i, batch_i, len(sorted_inputs) // batch_size, train_acc, valid_acc, loss))

    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and Saved')


Epoch   0 Batch  100/1787 - Train Accuracy: 0.8470, Validation Accuracy: 0.8363, Loss: 1.4672
Epoch   0 Batch  200/1787 - Train Accuracy: 0.7840, Validation Accuracy: 0.8363, Loss: 1.8051
Epoch   0 Batch  300/1787 - Train Accuracy: 0.8342, Validation Accuracy: 0.8363, Loss: 1.1951
Epoch   0 Batch  400/1787 - Train Accuracy: 0.0297, Validation Accuracy: 0.8363, Loss: 0.4936
Epoch   0 Batch  500/1787 - Train Accuracy: 0.0022, Validation Accuracy: 0.8363, Loss: 0.8870
Epoch   0 Batch  600/1787 - Train Accuracy: 0.0069, Validation Accuracy: 0.5350, Loss: 1.0083
Epoch   0 Batch  700/1787 - Train Accuracy: 0.0036, Validation Accuracy: 0.0342, Loss: 1.5231
Epoch   0 Batch  800/1787 - Train Accuracy: 0.0017, Validation Accuracy: 0.0000, Loss: 1.1466
Epoch   0 Batch  900/1787 - Train Accuracy: 0.0037, Validation Accuracy: 0.0000, Loss: 1.4830
Epoch   0 Batch 1000/1787 - Train Accuracy: 0.0095, Validation Accuracy: 0.0000, Loss: 1.4191
Epoch   0 Batch 1100/1787 - Train Accuracy: 0.0026, Validati

### Step 8: Test the Model

##### Test the model.

In [45]:
def save_params(params):
    with open('params.p', 'wb') as out_file:
        pickle.dump(params, out_file)


def load_params():
    with open('params.p', mode='rb') as in_file:
        return pickle.load(in_file)

In [46]:
save_params(save_path)
load_path = load_params()

In [78]:
def sentence_to_seq(sentence, vocab_to_int):
    results = []
    for word in sentence.split(" "):
        if word in vocab_to_int:
            results.append(vocab_to_int[word])
        else:
            results.append(vocab_to_int['<UNK>'])
            
    return results

translate_sentence = 'hello'

translate_sentence = sentence_to_seq(translate_sentence, inputs_int)

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    loader.restore(sess, load_path)

    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')

    translate_logits = sess.run(logits, {input_data: [translate_sentence]*batch_size,
                                         target_sequence_length: [len(translate_sentence)*2]*batch_size,
                                         keep_prob: 1.0})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in translate_sentence]))
print('  Question: {}'.format([inputs_vocab[i] for i in translate_sentence]))

print('\nPrediction')
print('  Word Ids:      {}'.format([i for i in translate_logits]))
print('  Answer: {}'.format(" ".join([targets_vocab[i] for i in translate_logits])))


INFO:tensorflow:Restoring parameters from checkpoints/dev
Input
  Word Ids:      [4026]
  Question: ['hello']

Prediction
  Word Ids:      [11591, 4347]
  Answer: you do


In [76]:
def sentence_to_seq(sentence, vocab_to_int):
    results = []
    for word in sentence.split(" "):
        if word in vocab_to_int:
            results.append(vocab_to_int[word])
        else:
            results.append(vocab_to_int['<UNK>'])
            
    return results

translate_sentence = 'what are you'

translate_sentence = sentence_to_seq(translate_sentence, inputs_int)

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    loader.restore(sess, load_path)

    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')

    translate_logits = sess.run(logits, {input_data: [translate_sentence]*batch_size,
                                         target_sequence_length: [len(translate_sentence)*2]*batch_size,
                                         keep_prob: 1.0})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in translate_sentence]))
print('  Question: {}'.format([inputs_vocab[i] for i in translate_sentence]))

print('\nPrediction')
print('  Word Ids:      {}'.format([i for i in translate_logits]))
print('  Answer: {}'.format(" ".join([targets_vocab[i] for i in translate_logits])))

INFO:tensorflow:Restoring parameters from checkpoints/dev
Input
  Word Ids:      [9919, 13555, 11591]
  Question: ['what', 'are', 'you']

Prediction
  Word Ids:      [5081, 3787, 11637, 13882, 13881]
  Answer: i am a <UNK> <EOS>
