In [1]:
%matplotlib inline

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")

In [3]:
# corpus_name = "cornell movie-dialogs corpus"
# corpus = os.path.join("data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

# printLines(os.path.join(corpus, "movie_lines.txt"))

In [4]:
# Splits each line of the file into a dictionary of fields
def loadLines(fileName, fields):
    lines = {}
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            lineObj = {}
            for i, field in enumerate(fields):
                lineObj[field] = values[i]
            lines[lineObj['lineID']] = lineObj
    return lines


# Groups fields of lines from `loadLines` into conversations based on *movie_conversations.txt*
def loadConversations(fileName, lines, fields):
    conversations = []
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            convObj = {}
            for i, field in enumerate(fields):
                convObj[field] = values[i]
            # Convert string to list (convObj["utteranceIDs"] == "['L598485', 'L598486', ...]")
            lineIds = eval(convObj["utteranceIDs"])
            # Reassemble lines
            convObj["lines"] = []
            for lineId in lineIds:
                convObj["lines"].append(lines[lineId])
            conversations.append(convObj)
    return conversations


# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
    qa_pairs = []
    for conversation in conversations:
        # Iterate over all the lines of the conversation
        for i in range(len(conversation["lines"]) - 1):  # We ignore the last line (no answer for it)
            inputLine = conversation["lines"][i]["text"].strip()
            targetLine = conversation["lines"][i+1]["text"].strip()
            # Filter wrong samples (if one of the lists is empty)
            if inputLine and targetLine:
                qa_pairs.append([inputLine, targetLine])
    return qa_pairs

Now we’ll call these functions and create the file. We’ll call it
*formatted_movie_lines.txt*.




In [5]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [6]:
MAX_LENGTH = 10  # Maximum sentence length to consider

# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
    print("Reading lines...")
    # Read the file and split into lines
    lines = open(datafile, encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split(',')] for l in lines[1:]]
    voc = Voc(corpus_name)
    return voc, pairs

import ipdb
# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
    # Input sequences need to preserve the last word for EOS token
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

# Filter pairs using filterPair condition
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
#     pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs

corpus_name = 'sarch_query'
datafile = 'data/Batch_generation_2/train_step_query.csv'

# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus_name, datafile, save_dir)
# Print some pairs to validate
print("\npairs:")
for pair in pairs[:10]:
    print(pair)

Start preparing training data ...
Reading lines...
Read 1789 sentence pairs
Trimmed to 1789 sentence pairs
Counting words...
Counted words: 4002

pairs:
['remove chicken thighs brine pat dry paper towel dry chicken need cooking', 'dry chicken needs']
['place paper towels drain excess oil benefits draining oil potatoes', 'include reason draining excess oil']
['add oregano garlic powder cumin chili powder cayenne salt pepper stir well covered cook another minutes level heat use cooking soul chili', 'would useful know heat needs adjusted step']
['traditionally turkish kisir eaten lettuce leaves serve lettuce leaf leave side wrap kisir inside lettuce leaves eat also serve tomatoes alongside peppers affect turkish kisir', 'paprika subbed red pepper flakes']
['deglaze skillet wine add cream chile puree cook reduced desired consistency stir chives make medallions pork tenderloin steak', 'making recipe gathering event ensure inform guests guest recipe calls wine']
['large skillet heat oil butt

In [7]:
MIN_COUNT = 3    # Minimum word count threshold for trimming

def trimRareWords(voc, pairs, MIN_COUNT):
    # Trim words used under the MIN_COUNT from the voc
    voc.trim(MIN_COUNT)
    # Filter out pairs with trimmed words
    keep_pairs = []
    for pair in pairs:
        input_sentence = pair[0]
        output_sentence = pair[1]
        keep_input = True
        keep_output = True
        # Check input sentence
        for word in input_sentence.split(' '):
            if word not in voc.word2index:
                keep_input = False
                break
        # Check output sentence
        for word in output_sentence.split(' '):
            if word not in voc.word2index:
                keep_output = False
                break

        # Only keep pairs that do not contain trimmed word(s) in their input or output sentence
        if keep_input and keep_output:
            keep_pairs.append(pair)

    print("Trimmed from {} pairs to {}, {:.4f} of total".format(len(pairs), len(keep_pairs), len(keep_pairs) / len(pairs)))
    return keep_pairs


# Trim voc and pairs
pairs = trimRareWords(voc, pairs, MIN_COUNT)

keep_words 1885 / 3999 = 0.4714
Trimmed from 1789 pairs to 631, 0.3527 of total


In [8]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.ByteTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len


# Example for validation
small_batch_size = 5
batches = batch2TrainData(voc, [random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

print("input_variable:", input_variable)
print("lengths:", lengths)
print("target_variable:", target_variable)
print("mask:", mask)
print("max_target_len:", max_target_len)

input_variable: tensor([[  77,   43,  925,  108,  265],
        [ 916,  604,  293,   14,   92],
        [ 127,  431,  288,  319,   36],
        [ 798,  200,  824,  200,  514],
        [ 160,   23,   31,  288,  541],
        [ 123,   29,  317,  314,  173],
        [ 159,   34,  162,  526,  829],
        [ 148,  828,   99,  257, 1606],
        [ 320,  431,   34,  526,    2],
        [ 160,  402,  688,  288,    0],
        [  26,  184,  120,  426,    0],
        [ 160,   38,  321,  936,    0],
        [ 161,   16,  468,    2,    0],
        [  23,   32,   36,    0,    0],
        [ 424,  265,  232,    0,    0],
        [ 145,   92, 1165,    0,    0],
        [ 160,   34,  456,    0,    0],
        [  26,  470,    2,    0,    0],
        [ 306, 1080,    0,    0,    0],
        [ 145,  431,    0,    0,    0],
        [ 160,    2,    0,    0,    0],
        [ 161,    0,    0,    0,    0],
        [ 798,    0,    0,    0,    0],
        [ 160,    0,    0,    0,    0],
        [  91,    0,    

In [9]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden

In [10]:
# Luong attention layer
class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = torch.nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [11]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output, hidden

In [12]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [13]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

In [14]:
def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename):

    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]

    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

In [15]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

In [16]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher, voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Bot:', ' '.join(output_words))

        except KeyError:
            print("Error: Encountered unknown word.")

In [17]:
# Configure models
model_name = 'cb_model'
attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

corpus_name = 'search_query'
alldatafile = 'data/Batch_generation_2/all.csv'
traindatafile = 'data/Batch_generation_2/train_step_query.csv'

voc, _ = loadPrepareData(corpus_name, alldatafile, save_dir)
_, pairs = loadPrepareData(corpus_name, traindatafile, save_dir)

voc.addWord('')



# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 4000
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')
# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Start preparing training data ...
Reading lines...
Read 2558 sentence pairs
Trimmed to 2558 sentence pairs
Counting words...
Counted words: 4955
Start preparing training data ...
Reading lines...
Read 1789 sentence pairs
Trimmed to 1789 sentence pairs
Counting words...
Counted words: 4002
Building encoder and decoder ...
Models built and ready to go!


In [18]:
def evaluateFile(encoder, decoder, searcher, voc, filename, targetname):
    text = list(csv.reader(open(filename, 'rt')))
    target = list(csv.reader(open(targetname, 'rt')))
    responses = []
#     input_sentence = ''
    for input_sentence in text:
        try:
            # Get input sentence
#             input_sentence = input('> ')
            # Check if it is quit case
#             if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence[0])
            # Evaluate sentence
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            responses.append(' '.join(output_words))
#             print('Bot:', ' '.join(output_words))
        except KeyError:
            responses.append(' ')
#             print("Error: Encountered unknown word.")
    return text, target, responses

In [19]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)


# Run training iterations
print("Starting Training!")
trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip, corpus_name, loadFilename)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 8.5154
Iteration: 2; Percent complete: 0.1%; Average loss: 8.4999
Iteration: 3; Percent complete: 0.1%; Average loss: 8.4761
Iteration: 4; Percent complete: 0.1%; Average loss: 8.4504
Iteration: 5; Percent complete: 0.1%; Average loss: 8.4108
Iteration: 6; Percent complete: 0.1%; Average loss: 8.3858
Iteration: 7; Percent complete: 0.2%; Average loss: 8.2885
Iteration: 8; Percent complete: 0.2%; Average loss: 8.1905
Iteration: 9; Percent complete: 0.2%; Average loss: 8.0567
Iteration: 10; Percent complete: 0.2%; Average loss: 7.8195
Iteration: 11; Percent complete: 0.3%; Average loss: 7.5193
Iteration: 12; Percent complete: 0.3%; Average loss: 7.3011
Iteration: 13; Percent complete: 0.3%; Average loss: 7.1612
Iteration: 14; Percent complete: 0.4%; Average loss: 7.4854
Iteration: 15; Percent complete: 0.4%; Average loss: 7.3595
Iteration: 16; Percent complete: 0.4%

Iteration: 137; Percent complete: 3.4%; Average loss: 5.8786
Iteration: 138; Percent complete: 3.5%; Average loss: 5.9894
Iteration: 139; Percent complete: 3.5%; Average loss: 5.6496
Iteration: 140; Percent complete: 3.5%; Average loss: 5.6506
Iteration: 141; Percent complete: 3.5%; Average loss: 5.8550
Iteration: 142; Percent complete: 3.5%; Average loss: 5.6227
Iteration: 143; Percent complete: 3.6%; Average loss: 5.9569
Iteration: 144; Percent complete: 3.6%; Average loss: 6.0284
Iteration: 145; Percent complete: 3.6%; Average loss: 6.0295
Iteration: 146; Percent complete: 3.6%; Average loss: 5.8647
Iteration: 147; Percent complete: 3.7%; Average loss: 5.6077
Iteration: 148; Percent complete: 3.7%; Average loss: 5.8315
Iteration: 149; Percent complete: 3.7%; Average loss: 5.9396
Iteration: 150; Percent complete: 3.8%; Average loss: 5.8931
Iteration: 151; Percent complete: 3.8%; Average loss: 6.0117
Iteration: 152; Percent complete: 3.8%; Average loss: 5.7946
Iteration: 153; Percent 

Iteration: 272; Percent complete: 6.8%; Average loss: 5.2530
Iteration: 273; Percent complete: 6.8%; Average loss: 5.0257
Iteration: 274; Percent complete: 6.9%; Average loss: 4.4917
Iteration: 275; Percent complete: 6.9%; Average loss: 5.2230
Iteration: 276; Percent complete: 6.9%; Average loss: 4.5806
Iteration: 277; Percent complete: 6.9%; Average loss: 5.0455
Iteration: 278; Percent complete: 7.0%; Average loss: 5.0350
Iteration: 279; Percent complete: 7.0%; Average loss: 4.5617
Iteration: 280; Percent complete: 7.0%; Average loss: 4.8165
Iteration: 281; Percent complete: 7.0%; Average loss: 4.8072
Iteration: 282; Percent complete: 7.0%; Average loss: 4.8668
Iteration: 283; Percent complete: 7.1%; Average loss: 4.9597
Iteration: 284; Percent complete: 7.1%; Average loss: 4.6871
Iteration: 285; Percent complete: 7.1%; Average loss: 5.1062
Iteration: 286; Percent complete: 7.1%; Average loss: 5.0133
Iteration: 287; Percent complete: 7.2%; Average loss: 5.0329
Iteration: 288; Percent 

Iteration: 408; Percent complete: 10.2%; Average loss: 3.7876
Iteration: 409; Percent complete: 10.2%; Average loss: 4.0188
Iteration: 410; Percent complete: 10.2%; Average loss: 3.6516
Iteration: 411; Percent complete: 10.3%; Average loss: 3.4329
Iteration: 412; Percent complete: 10.3%; Average loss: 4.1355
Iteration: 413; Percent complete: 10.3%; Average loss: 4.1864
Iteration: 414; Percent complete: 10.3%; Average loss: 3.8305
Iteration: 415; Percent complete: 10.4%; Average loss: 4.1991
Iteration: 416; Percent complete: 10.4%; Average loss: 4.0429
Iteration: 417; Percent complete: 10.4%; Average loss: 3.5766
Iteration: 418; Percent complete: 10.4%; Average loss: 4.0629
Iteration: 419; Percent complete: 10.5%; Average loss: 3.9677
Iteration: 420; Percent complete: 10.5%; Average loss: 4.1732
Iteration: 421; Percent complete: 10.5%; Average loss: 4.1574
Iteration: 422; Percent complete: 10.5%; Average loss: 4.0122
Iteration: 423; Percent complete: 10.6%; Average loss: 3.9986
Iteratio

Iteration: 542; Percent complete: 13.6%; Average loss: 3.2424
Iteration: 543; Percent complete: 13.6%; Average loss: 3.0336
Iteration: 544; Percent complete: 13.6%; Average loss: 3.1552
Iteration: 545; Percent complete: 13.6%; Average loss: 2.9625
Iteration: 546; Percent complete: 13.7%; Average loss: 3.0795
Iteration: 547; Percent complete: 13.7%; Average loss: 2.9034
Iteration: 548; Percent complete: 13.7%; Average loss: 3.0622
Iteration: 549; Percent complete: 13.7%; Average loss: 2.7955
Iteration: 550; Percent complete: 13.8%; Average loss: 2.7602
Iteration: 551; Percent complete: 13.8%; Average loss: 2.9692
Iteration: 552; Percent complete: 13.8%; Average loss: 3.0023
Iteration: 553; Percent complete: 13.8%; Average loss: 2.9715
Iteration: 554; Percent complete: 13.9%; Average loss: 2.6997
Iteration: 555; Percent complete: 13.9%; Average loss: 3.1478
Iteration: 556; Percent complete: 13.9%; Average loss: 3.1498
Iteration: 557; Percent complete: 13.9%; Average loss: 2.9207
Iteratio

Iteration: 677; Percent complete: 16.9%; Average loss: 2.1935
Iteration: 678; Percent complete: 17.0%; Average loss: 2.3636
Iteration: 679; Percent complete: 17.0%; Average loss: 1.9799
Iteration: 680; Percent complete: 17.0%; Average loss: 2.0842
Iteration: 681; Percent complete: 17.0%; Average loss: 2.3948
Iteration: 682; Percent complete: 17.1%; Average loss: 2.2524
Iteration: 683; Percent complete: 17.1%; Average loss: 2.2378
Iteration: 684; Percent complete: 17.1%; Average loss: 2.1593
Iteration: 685; Percent complete: 17.1%; Average loss: 1.9938
Iteration: 686; Percent complete: 17.2%; Average loss: 1.9602
Iteration: 687; Percent complete: 17.2%; Average loss: 2.2107
Iteration: 688; Percent complete: 17.2%; Average loss: 2.2607
Iteration: 689; Percent complete: 17.2%; Average loss: 1.9514
Iteration: 690; Percent complete: 17.2%; Average loss: 1.9069
Iteration: 691; Percent complete: 17.3%; Average loss: 2.1143
Iteration: 692; Percent complete: 17.3%; Average loss: 2.2762
Iteratio

Iteration: 812; Percent complete: 20.3%; Average loss: 1.6932
Iteration: 813; Percent complete: 20.3%; Average loss: 1.4765
Iteration: 814; Percent complete: 20.3%; Average loss: 1.4572
Iteration: 815; Percent complete: 20.4%; Average loss: 1.6332
Iteration: 816; Percent complete: 20.4%; Average loss: 1.6539
Iteration: 817; Percent complete: 20.4%; Average loss: 1.3676
Iteration: 818; Percent complete: 20.4%; Average loss: 1.4884
Iteration: 819; Percent complete: 20.5%; Average loss: 1.1897
Iteration: 820; Percent complete: 20.5%; Average loss: 1.6442
Iteration: 821; Percent complete: 20.5%; Average loss: 1.5089
Iteration: 822; Percent complete: 20.5%; Average loss: 1.3824
Iteration: 823; Percent complete: 20.6%; Average loss: 1.2229
Iteration: 824; Percent complete: 20.6%; Average loss: 1.4273
Iteration: 825; Percent complete: 20.6%; Average loss: 1.2877
Iteration: 826; Percent complete: 20.6%; Average loss: 1.4429
Iteration: 827; Percent complete: 20.7%; Average loss: 1.6856
Iteratio

Iteration: 947; Percent complete: 23.7%; Average loss: 0.9989
Iteration: 948; Percent complete: 23.7%; Average loss: 1.1023
Iteration: 949; Percent complete: 23.7%; Average loss: 1.0166
Iteration: 950; Percent complete: 23.8%; Average loss: 1.2197
Iteration: 951; Percent complete: 23.8%; Average loss: 1.0220
Iteration: 952; Percent complete: 23.8%; Average loss: 1.0144
Iteration: 953; Percent complete: 23.8%; Average loss: 0.9005
Iteration: 954; Percent complete: 23.8%; Average loss: 1.0602
Iteration: 955; Percent complete: 23.9%; Average loss: 1.0656
Iteration: 956; Percent complete: 23.9%; Average loss: 1.0946
Iteration: 957; Percent complete: 23.9%; Average loss: 0.8494
Iteration: 958; Percent complete: 23.9%; Average loss: 1.0066
Iteration: 959; Percent complete: 24.0%; Average loss: 0.9936
Iteration: 960; Percent complete: 24.0%; Average loss: 1.1064
Iteration: 961; Percent complete: 24.0%; Average loss: 1.1038
Iteration: 962; Percent complete: 24.1%; Average loss: 0.8420
Iteratio

Iteration: 1078; Percent complete: 27.0%; Average loss: 0.7190
Iteration: 1079; Percent complete: 27.0%; Average loss: 0.5586
Iteration: 1080; Percent complete: 27.0%; Average loss: 0.6789
Iteration: 1081; Percent complete: 27.0%; Average loss: 0.7109
Iteration: 1082; Percent complete: 27.1%; Average loss: 0.6664
Iteration: 1083; Percent complete: 27.1%; Average loss: 0.7005
Iteration: 1084; Percent complete: 27.1%; Average loss: 0.7808
Iteration: 1085; Percent complete: 27.1%; Average loss: 0.6738
Iteration: 1086; Percent complete: 27.2%; Average loss: 0.5536
Iteration: 1087; Percent complete: 27.2%; Average loss: 0.7889
Iteration: 1088; Percent complete: 27.2%; Average loss: 0.6748
Iteration: 1089; Percent complete: 27.2%; Average loss: 0.5876
Iteration: 1090; Percent complete: 27.3%; Average loss: 0.6360
Iteration: 1091; Percent complete: 27.3%; Average loss: 0.5896
Iteration: 1092; Percent complete: 27.3%; Average loss: 0.6465
Iteration: 1093; Percent complete: 27.3%; Average loss:

Iteration: 1211; Percent complete: 30.3%; Average loss: 0.3689
Iteration: 1212; Percent complete: 30.3%; Average loss: 0.4773
Iteration: 1213; Percent complete: 30.3%; Average loss: 0.4001
Iteration: 1214; Percent complete: 30.3%; Average loss: 0.3706
Iteration: 1215; Percent complete: 30.4%; Average loss: 0.4364
Iteration: 1216; Percent complete: 30.4%; Average loss: 0.4787
Iteration: 1217; Percent complete: 30.4%; Average loss: 0.4400
Iteration: 1218; Percent complete: 30.4%; Average loss: 0.4281
Iteration: 1219; Percent complete: 30.5%; Average loss: 0.4220
Iteration: 1220; Percent complete: 30.5%; Average loss: 0.4674
Iteration: 1221; Percent complete: 30.5%; Average loss: 0.3508
Iteration: 1222; Percent complete: 30.6%; Average loss: 0.4227
Iteration: 1223; Percent complete: 30.6%; Average loss: 0.3516
Iteration: 1224; Percent complete: 30.6%; Average loss: 0.3721
Iteration: 1225; Percent complete: 30.6%; Average loss: 0.3900
Iteration: 1226; Percent complete: 30.6%; Average loss:

Iteration: 1342; Percent complete: 33.6%; Average loss: 0.2631
Iteration: 1343; Percent complete: 33.6%; Average loss: 0.3010
Iteration: 1344; Percent complete: 33.6%; Average loss: 0.2673
Iteration: 1345; Percent complete: 33.6%; Average loss: 0.2244
Iteration: 1346; Percent complete: 33.7%; Average loss: 0.2805
Iteration: 1347; Percent complete: 33.7%; Average loss: 0.2310
Iteration: 1348; Percent complete: 33.7%; Average loss: 0.2751
Iteration: 1349; Percent complete: 33.7%; Average loss: 0.2320
Iteration: 1350; Percent complete: 33.8%; Average loss: 0.3387
Iteration: 1351; Percent complete: 33.8%; Average loss: 0.2874
Iteration: 1352; Percent complete: 33.8%; Average loss: 0.2485
Iteration: 1353; Percent complete: 33.8%; Average loss: 0.2793
Iteration: 1354; Percent complete: 33.9%; Average loss: 0.2425
Iteration: 1355; Percent complete: 33.9%; Average loss: 0.2414
Iteration: 1356; Percent complete: 33.9%; Average loss: 0.2665
Iteration: 1357; Percent complete: 33.9%; Average loss:

Iteration: 1475; Percent complete: 36.9%; Average loss: 0.1883
Iteration: 1476; Percent complete: 36.9%; Average loss: 0.1448
Iteration: 1477; Percent complete: 36.9%; Average loss: 0.2006
Iteration: 1478; Percent complete: 37.0%; Average loss: 0.1747
Iteration: 1479; Percent complete: 37.0%; Average loss: 0.1752
Iteration: 1480; Percent complete: 37.0%; Average loss: 0.1788
Iteration: 1481; Percent complete: 37.0%; Average loss: 0.1365
Iteration: 1482; Percent complete: 37.0%; Average loss: 0.1714
Iteration: 1483; Percent complete: 37.1%; Average loss: 0.1795
Iteration: 1484; Percent complete: 37.1%; Average loss: 0.1666
Iteration: 1485; Percent complete: 37.1%; Average loss: 0.1696
Iteration: 1486; Percent complete: 37.1%; Average loss: 0.1920
Iteration: 1487; Percent complete: 37.2%; Average loss: 0.1571
Iteration: 1488; Percent complete: 37.2%; Average loss: 0.1927
Iteration: 1489; Percent complete: 37.2%; Average loss: 0.1719
Iteration: 1490; Percent complete: 37.2%; Average loss:

Iteration: 1607; Percent complete: 40.2%; Average loss: 0.0968
Iteration: 1608; Percent complete: 40.2%; Average loss: 0.1108
Iteration: 1609; Percent complete: 40.2%; Average loss: 0.1245
Iteration: 1610; Percent complete: 40.2%; Average loss: 0.1149
Iteration: 1611; Percent complete: 40.3%; Average loss: 0.1178
Iteration: 1612; Percent complete: 40.3%; Average loss: 0.1335
Iteration: 1613; Percent complete: 40.3%; Average loss: 0.1129
Iteration: 1614; Percent complete: 40.4%; Average loss: 0.1044
Iteration: 1615; Percent complete: 40.4%; Average loss: 0.1236
Iteration: 1616; Percent complete: 40.4%; Average loss: 0.0926
Iteration: 1617; Percent complete: 40.4%; Average loss: 0.1165
Iteration: 1618; Percent complete: 40.5%; Average loss: 0.1300
Iteration: 1619; Percent complete: 40.5%; Average loss: 0.0927
Iteration: 1620; Percent complete: 40.5%; Average loss: 0.1099
Iteration: 1621; Percent complete: 40.5%; Average loss: 0.1070
Iteration: 1622; Percent complete: 40.6%; Average loss:

Iteration: 1738; Percent complete: 43.5%; Average loss: 0.0894
Iteration: 1739; Percent complete: 43.5%; Average loss: 0.0725
Iteration: 1740; Percent complete: 43.5%; Average loss: 0.0796
Iteration: 1741; Percent complete: 43.5%; Average loss: 0.0846
Iteration: 1742; Percent complete: 43.5%; Average loss: 0.0948
Iteration: 1743; Percent complete: 43.6%; Average loss: 0.0753
Iteration: 1744; Percent complete: 43.6%; Average loss: 0.0743
Iteration: 1745; Percent complete: 43.6%; Average loss: 0.0759
Iteration: 1746; Percent complete: 43.6%; Average loss: 0.0817
Iteration: 1747; Percent complete: 43.7%; Average loss: 0.0835
Iteration: 1748; Percent complete: 43.7%; Average loss: 0.0750
Iteration: 1749; Percent complete: 43.7%; Average loss: 0.0763
Iteration: 1750; Percent complete: 43.8%; Average loss: 0.0691
Iteration: 1751; Percent complete: 43.8%; Average loss: 0.0746
Iteration: 1752; Percent complete: 43.8%; Average loss: 0.0774
Iteration: 1753; Percent complete: 43.8%; Average loss:

Iteration: 1870; Percent complete: 46.8%; Average loss: 0.0644
Iteration: 1871; Percent complete: 46.8%; Average loss: 0.0742
Iteration: 1872; Percent complete: 46.8%; Average loss: 0.0659
Iteration: 1873; Percent complete: 46.8%; Average loss: 0.0588
Iteration: 1874; Percent complete: 46.9%; Average loss: 0.0611
Iteration: 1875; Percent complete: 46.9%; Average loss: 0.0675
Iteration: 1876; Percent complete: 46.9%; Average loss: 0.0554
Iteration: 1877; Percent complete: 46.9%; Average loss: 0.0595
Iteration: 1878; Percent complete: 46.9%; Average loss: 0.0675
Iteration: 1879; Percent complete: 47.0%; Average loss: 0.0537
Iteration: 1880; Percent complete: 47.0%; Average loss: 0.0608
Iteration: 1881; Percent complete: 47.0%; Average loss: 0.0529
Iteration: 1882; Percent complete: 47.0%; Average loss: 0.0649
Iteration: 1883; Percent complete: 47.1%; Average loss: 0.0655
Iteration: 1884; Percent complete: 47.1%; Average loss: 0.0755
Iteration: 1885; Percent complete: 47.1%; Average loss:

Iteration: 2001; Percent complete: 50.0%; Average loss: 0.0528
Iteration: 2002; Percent complete: 50.0%; Average loss: 0.0550
Iteration: 2003; Percent complete: 50.1%; Average loss: 0.0453
Iteration: 2004; Percent complete: 50.1%; Average loss: 0.0514
Iteration: 2005; Percent complete: 50.1%; Average loss: 0.0510
Iteration: 2006; Percent complete: 50.1%; Average loss: 0.0444
Iteration: 2007; Percent complete: 50.2%; Average loss: 0.0491
Iteration: 2008; Percent complete: 50.2%; Average loss: 0.0519
Iteration: 2009; Percent complete: 50.2%; Average loss: 0.0434
Iteration: 2010; Percent complete: 50.2%; Average loss: 0.0425
Iteration: 2011; Percent complete: 50.3%; Average loss: 0.0507
Iteration: 2012; Percent complete: 50.3%; Average loss: 0.0423
Iteration: 2013; Percent complete: 50.3%; Average loss: 0.0508
Iteration: 2014; Percent complete: 50.3%; Average loss: 0.0499
Iteration: 2015; Percent complete: 50.4%; Average loss: 0.0520
Iteration: 2016; Percent complete: 50.4%; Average loss:

Iteration: 2134; Percent complete: 53.3%; Average loss: 0.0358
Iteration: 2135; Percent complete: 53.4%; Average loss: 0.0369
Iteration: 2136; Percent complete: 53.4%; Average loss: 0.0397
Iteration: 2137; Percent complete: 53.4%; Average loss: 0.0319
Iteration: 2138; Percent complete: 53.4%; Average loss: 0.0362
Iteration: 2139; Percent complete: 53.5%; Average loss: 0.0336
Iteration: 2140; Percent complete: 53.5%; Average loss: 0.0325
Iteration: 2141; Percent complete: 53.5%; Average loss: 0.0343
Iteration: 2142; Percent complete: 53.5%; Average loss: 0.0392
Iteration: 2143; Percent complete: 53.6%; Average loss: 0.0364
Iteration: 2144; Percent complete: 53.6%; Average loss: 0.0471
Iteration: 2145; Percent complete: 53.6%; Average loss: 0.0334
Iteration: 2146; Percent complete: 53.6%; Average loss: 0.0429
Iteration: 2147; Percent complete: 53.7%; Average loss: 0.0309
Iteration: 2148; Percent complete: 53.7%; Average loss: 0.0414
Iteration: 2149; Percent complete: 53.7%; Average loss:

Iteration: 2266; Percent complete: 56.6%; Average loss: 0.0280
Iteration: 2267; Percent complete: 56.7%; Average loss: 0.0292
Iteration: 2268; Percent complete: 56.7%; Average loss: 0.0329
Iteration: 2269; Percent complete: 56.7%; Average loss: 0.0278
Iteration: 2270; Percent complete: 56.8%; Average loss: 0.0289
Iteration: 2271; Percent complete: 56.8%; Average loss: 0.0337
Iteration: 2272; Percent complete: 56.8%; Average loss: 0.0388
Iteration: 2273; Percent complete: 56.8%; Average loss: 0.0423
Iteration: 2274; Percent complete: 56.9%; Average loss: 0.0345
Iteration: 2275; Percent complete: 56.9%; Average loss: 0.0305
Iteration: 2276; Percent complete: 56.9%; Average loss: 0.0255
Iteration: 2277; Percent complete: 56.9%; Average loss: 0.0351
Iteration: 2278; Percent complete: 57.0%; Average loss: 0.0254
Iteration: 2279; Percent complete: 57.0%; Average loss: 0.0342
Iteration: 2280; Percent complete: 57.0%; Average loss: 0.0349
Iteration: 2281; Percent complete: 57.0%; Average loss:

Iteration: 2398; Percent complete: 60.0%; Average loss: 0.0273
Iteration: 2399; Percent complete: 60.0%; Average loss: 0.0300
Iteration: 2400; Percent complete: 60.0%; Average loss: 0.0281
Iteration: 2401; Percent complete: 60.0%; Average loss: 0.0272
Iteration: 2402; Percent complete: 60.1%; Average loss: 0.0254
Iteration: 2403; Percent complete: 60.1%; Average loss: 0.0302
Iteration: 2404; Percent complete: 60.1%; Average loss: 0.0241
Iteration: 2405; Percent complete: 60.1%; Average loss: 0.0246
Iteration: 2406; Percent complete: 60.2%; Average loss: 0.0290
Iteration: 2407; Percent complete: 60.2%; Average loss: 0.0234
Iteration: 2408; Percent complete: 60.2%; Average loss: 0.0204
Iteration: 2409; Percent complete: 60.2%; Average loss: 0.0278
Iteration: 2410; Percent complete: 60.2%; Average loss: 0.0193
Iteration: 2411; Percent complete: 60.3%; Average loss: 0.0258
Iteration: 2412; Percent complete: 60.3%; Average loss: 0.0255
Iteration: 2413; Percent complete: 60.3%; Average loss:

Iteration: 2530; Percent complete: 63.2%; Average loss: 0.0254
Iteration: 2531; Percent complete: 63.3%; Average loss: 0.0201
Iteration: 2532; Percent complete: 63.3%; Average loss: 0.0208
Iteration: 2533; Percent complete: 63.3%; Average loss: 0.0234
Iteration: 2534; Percent complete: 63.3%; Average loss: 0.0207
Iteration: 2535; Percent complete: 63.4%; Average loss: 0.0199
Iteration: 2536; Percent complete: 63.4%; Average loss: 0.0626
Iteration: 2537; Percent complete: 63.4%; Average loss: 0.0184
Iteration: 2538; Percent complete: 63.4%; Average loss: 0.0208
Iteration: 2539; Percent complete: 63.5%; Average loss: 0.0225
Iteration: 2540; Percent complete: 63.5%; Average loss: 0.0212
Iteration: 2541; Percent complete: 63.5%; Average loss: 0.0212
Iteration: 2542; Percent complete: 63.5%; Average loss: 0.0239
Iteration: 2543; Percent complete: 63.6%; Average loss: 0.0214
Iteration: 2544; Percent complete: 63.6%; Average loss: 0.0220
Iteration: 2545; Percent complete: 63.6%; Average loss:

Iteration: 2661; Percent complete: 66.5%; Average loss: 0.0200
Iteration: 2662; Percent complete: 66.5%; Average loss: 0.0216
Iteration: 2663; Percent complete: 66.6%; Average loss: 0.0168
Iteration: 2664; Percent complete: 66.6%; Average loss: 0.0179
Iteration: 2665; Percent complete: 66.6%; Average loss: 0.0189
Iteration: 2666; Percent complete: 66.6%; Average loss: 0.0208
Iteration: 2667; Percent complete: 66.7%; Average loss: 0.0191
Iteration: 2668; Percent complete: 66.7%; Average loss: 0.0197
Iteration: 2669; Percent complete: 66.7%; Average loss: 0.0340
Iteration: 2670; Percent complete: 66.8%; Average loss: 0.0188
Iteration: 2671; Percent complete: 66.8%; Average loss: 0.0258
Iteration: 2672; Percent complete: 66.8%; Average loss: 0.0192
Iteration: 2673; Percent complete: 66.8%; Average loss: 0.0272
Iteration: 2674; Percent complete: 66.8%; Average loss: 0.0195
Iteration: 2675; Percent complete: 66.9%; Average loss: 0.0184
Iteration: 2676; Percent complete: 66.9%; Average loss:

Iteration: 2792; Percent complete: 69.8%; Average loss: 0.0234
Iteration: 2793; Percent complete: 69.8%; Average loss: 0.0599
Iteration: 2794; Percent complete: 69.8%; Average loss: 0.0308
Iteration: 2795; Percent complete: 69.9%; Average loss: 0.0368
Iteration: 2796; Percent complete: 69.9%; Average loss: 0.0300
Iteration: 2797; Percent complete: 69.9%; Average loss: 0.0297
Iteration: 2798; Percent complete: 70.0%; Average loss: 0.0259
Iteration: 2799; Percent complete: 70.0%; Average loss: 0.0193
Iteration: 2800; Percent complete: 70.0%; Average loss: 0.0230
Iteration: 2801; Percent complete: 70.0%; Average loss: 0.0303
Iteration: 2802; Percent complete: 70.0%; Average loss: 0.0722
Iteration: 2803; Percent complete: 70.1%; Average loss: 0.0233
Iteration: 2804; Percent complete: 70.1%; Average loss: 0.0474
Iteration: 2805; Percent complete: 70.1%; Average loss: 0.0312
Iteration: 2806; Percent complete: 70.2%; Average loss: 0.0587
Iteration: 2807; Percent complete: 70.2%; Average loss:

Iteration: 2923; Percent complete: 73.1%; Average loss: 0.0522
Iteration: 2924; Percent complete: 73.1%; Average loss: 0.0544
Iteration: 2925; Percent complete: 73.1%; Average loss: 0.0891
Iteration: 2926; Percent complete: 73.2%; Average loss: 0.0712
Iteration: 2927; Percent complete: 73.2%; Average loss: 0.0701
Iteration: 2928; Percent complete: 73.2%; Average loss: 0.1102
Iteration: 2929; Percent complete: 73.2%; Average loss: 0.0704
Iteration: 2930; Percent complete: 73.2%; Average loss: 0.0631
Iteration: 2931; Percent complete: 73.3%; Average loss: 0.0657
Iteration: 2932; Percent complete: 73.3%; Average loss: 0.0473
Iteration: 2933; Percent complete: 73.3%; Average loss: 0.0607
Iteration: 2934; Percent complete: 73.4%; Average loss: 0.0692
Iteration: 2935; Percent complete: 73.4%; Average loss: 0.0591
Iteration: 2936; Percent complete: 73.4%; Average loss: 0.0585
Iteration: 2937; Percent complete: 73.4%; Average loss: 0.0792
Iteration: 2938; Percent complete: 73.5%; Average loss:

Iteration: 3056; Percent complete: 76.4%; Average loss: 0.0694
Iteration: 3057; Percent complete: 76.4%; Average loss: 0.1145
Iteration: 3058; Percent complete: 76.4%; Average loss: 0.1243
Iteration: 3059; Percent complete: 76.5%; Average loss: 0.0660
Iteration: 3060; Percent complete: 76.5%; Average loss: 0.0773
Iteration: 3061; Percent complete: 76.5%; Average loss: 0.0779
Iteration: 3062; Percent complete: 76.5%; Average loss: 0.0742
Iteration: 3063; Percent complete: 76.6%; Average loss: 0.0543
Iteration: 3064; Percent complete: 76.6%; Average loss: 0.0519
Iteration: 3065; Percent complete: 76.6%; Average loss: 0.0804
Iteration: 3066; Percent complete: 76.6%; Average loss: 0.0772
Iteration: 3067; Percent complete: 76.7%; Average loss: 0.1099
Iteration: 3068; Percent complete: 76.7%; Average loss: 0.0742
Iteration: 3069; Percent complete: 76.7%; Average loss: 0.0708
Iteration: 3070; Percent complete: 76.8%; Average loss: 0.1125
Iteration: 3071; Percent complete: 76.8%; Average loss:

Iteration: 3187; Percent complete: 79.7%; Average loss: 0.0637
Iteration: 3188; Percent complete: 79.7%; Average loss: 0.0466
Iteration: 3189; Percent complete: 79.7%; Average loss: 0.0495
Iteration: 3190; Percent complete: 79.8%; Average loss: 0.0818
Iteration: 3191; Percent complete: 79.8%; Average loss: 0.0600
Iteration: 3192; Percent complete: 79.8%; Average loss: 0.0560
Iteration: 3193; Percent complete: 79.8%; Average loss: 0.0645
Iteration: 3194; Percent complete: 79.8%; Average loss: 0.0497
Iteration: 3195; Percent complete: 79.9%; Average loss: 0.0424
Iteration: 3196; Percent complete: 79.9%; Average loss: 0.0459
Iteration: 3197; Percent complete: 79.9%; Average loss: 0.0491
Iteration: 3198; Percent complete: 80.0%; Average loss: 0.0852
Iteration: 3199; Percent complete: 80.0%; Average loss: 0.0482
Iteration: 3200; Percent complete: 80.0%; Average loss: 0.0552
Iteration: 3201; Percent complete: 80.0%; Average loss: 0.0594
Iteration: 3202; Percent complete: 80.0%; Average loss:

Iteration: 3319; Percent complete: 83.0%; Average loss: 0.0293
Iteration: 3320; Percent complete: 83.0%; Average loss: 0.0438
Iteration: 3321; Percent complete: 83.0%; Average loss: 0.0377
Iteration: 3322; Percent complete: 83.0%; Average loss: 0.0364
Iteration: 3323; Percent complete: 83.1%; Average loss: 0.0400
Iteration: 3324; Percent complete: 83.1%; Average loss: 0.0577
Iteration: 3325; Percent complete: 83.1%; Average loss: 0.0316
Iteration: 3326; Percent complete: 83.2%; Average loss: 0.0436
Iteration: 3327; Percent complete: 83.2%; Average loss: 0.0315
Iteration: 3328; Percent complete: 83.2%; Average loss: 0.0343
Iteration: 3329; Percent complete: 83.2%; Average loss: 0.0339
Iteration: 3330; Percent complete: 83.2%; Average loss: 0.0336
Iteration: 3331; Percent complete: 83.3%; Average loss: 0.0392
Iteration: 3332; Percent complete: 83.3%; Average loss: 0.0328
Iteration: 3333; Percent complete: 83.3%; Average loss: 0.0263
Iteration: 3334; Percent complete: 83.4%; Average loss:

Iteration: 3451; Percent complete: 86.3%; Average loss: 0.0377
Iteration: 3452; Percent complete: 86.3%; Average loss: 0.0487
Iteration: 3453; Percent complete: 86.3%; Average loss: 0.0506
Iteration: 3454; Percent complete: 86.4%; Average loss: 0.0434
Iteration: 3455; Percent complete: 86.4%; Average loss: 0.1145
Iteration: 3456; Percent complete: 86.4%; Average loss: 0.0374
Iteration: 3457; Percent complete: 86.4%; Average loss: 0.0393
Iteration: 3458; Percent complete: 86.5%; Average loss: 0.0310
Iteration: 3459; Percent complete: 86.5%; Average loss: 0.0392
Iteration: 3460; Percent complete: 86.5%; Average loss: 0.0347
Iteration: 3461; Percent complete: 86.5%; Average loss: 0.0364
Iteration: 3462; Percent complete: 86.6%; Average loss: 0.0316
Iteration: 3463; Percent complete: 86.6%; Average loss: 0.0361
Iteration: 3464; Percent complete: 86.6%; Average loss: 0.0313
Iteration: 3465; Percent complete: 86.6%; Average loss: 0.0941
Iteration: 3466; Percent complete: 86.7%; Average loss:

Iteration: 3582; Percent complete: 89.5%; Average loss: 0.0265
Iteration: 3583; Percent complete: 89.6%; Average loss: 0.0188
Iteration: 3584; Percent complete: 89.6%; Average loss: 0.0244
Iteration: 3585; Percent complete: 89.6%; Average loss: 0.0247
Iteration: 3586; Percent complete: 89.6%; Average loss: 0.0167
Iteration: 3587; Percent complete: 89.7%; Average loss: 0.0179
Iteration: 3588; Percent complete: 89.7%; Average loss: 0.0207
Iteration: 3589; Percent complete: 89.7%; Average loss: 0.0237
Iteration: 3590; Percent complete: 89.8%; Average loss: 0.0396
Iteration: 3591; Percent complete: 89.8%; Average loss: 0.0173
Iteration: 3592; Percent complete: 89.8%; Average loss: 0.0387
Iteration: 3593; Percent complete: 89.8%; Average loss: 0.0178
Iteration: 3594; Percent complete: 89.8%; Average loss: 0.0177
Iteration: 3595; Percent complete: 89.9%; Average loss: 0.0186
Iteration: 3596; Percent complete: 89.9%; Average loss: 0.0159
Iteration: 3597; Percent complete: 89.9%; Average loss:

Iteration: 3713; Percent complete: 92.8%; Average loss: 0.0162
Iteration: 3714; Percent complete: 92.8%; Average loss: 0.0168
Iteration: 3715; Percent complete: 92.9%; Average loss: 0.0146
Iteration: 3716; Percent complete: 92.9%; Average loss: 0.0201
Iteration: 3717; Percent complete: 92.9%; Average loss: 0.0134
Iteration: 3718; Percent complete: 93.0%; Average loss: 0.0131
Iteration: 3719; Percent complete: 93.0%; Average loss: 0.0154
Iteration: 3720; Percent complete: 93.0%; Average loss: 0.0147
Iteration: 3721; Percent complete: 93.0%; Average loss: 0.0140
Iteration: 3722; Percent complete: 93.0%; Average loss: 0.0229
Iteration: 3723; Percent complete: 93.1%; Average loss: 0.0144
Iteration: 3724; Percent complete: 93.1%; Average loss: 0.0137
Iteration: 3725; Percent complete: 93.1%; Average loss: 0.0155
Iteration: 3726; Percent complete: 93.2%; Average loss: 0.0145
Iteration: 3727; Percent complete: 93.2%; Average loss: 0.0177
Iteration: 3728; Percent complete: 93.2%; Average loss:

Iteration: 3844; Percent complete: 96.1%; Average loss: 0.0138
Iteration: 3845; Percent complete: 96.1%; Average loss: 0.0139
Iteration: 3846; Percent complete: 96.2%; Average loss: 0.0136
Iteration: 3847; Percent complete: 96.2%; Average loss: 0.0114
Iteration: 3848; Percent complete: 96.2%; Average loss: 0.0140
Iteration: 3849; Percent complete: 96.2%; Average loss: 0.0153
Iteration: 3850; Percent complete: 96.2%; Average loss: 0.0125
Iteration: 3851; Percent complete: 96.3%; Average loss: 0.0128
Iteration: 3852; Percent complete: 96.3%; Average loss: 0.0116
Iteration: 3853; Percent complete: 96.3%; Average loss: 0.0095
Iteration: 3854; Percent complete: 96.4%; Average loss: 0.0108
Iteration: 3855; Percent complete: 96.4%; Average loss: 0.0104
Iteration: 3856; Percent complete: 96.4%; Average loss: 0.0173
Iteration: 3857; Percent complete: 96.4%; Average loss: 0.0126
Iteration: 3858; Percent complete: 96.5%; Average loss: 0.0134
Iteration: 3859; Percent complete: 96.5%; Average loss:

Iteration: 3975; Percent complete: 99.4%; Average loss: 0.0095
Iteration: 3976; Percent complete: 99.4%; Average loss: 0.0079
Iteration: 3977; Percent complete: 99.4%; Average loss: 0.0090
Iteration: 3978; Percent complete: 99.5%; Average loss: 0.0082
Iteration: 3979; Percent complete: 99.5%; Average loss: 0.0083
Iteration: 3980; Percent complete: 99.5%; Average loss: 0.0088
Iteration: 3981; Percent complete: 99.5%; Average loss: 0.0080
Iteration: 3982; Percent complete: 99.6%; Average loss: 0.0075
Iteration: 3983; Percent complete: 99.6%; Average loss: 0.0086
Iteration: 3984; Percent complete: 99.6%; Average loss: 0.0080
Iteration: 3985; Percent complete: 99.6%; Average loss: 0.0097
Iteration: 3986; Percent complete: 99.7%; Average loss: 0.0094
Iteration: 3987; Percent complete: 99.7%; Average loss: 0.0093
Iteration: 3988; Percent complete: 99.7%; Average loss: 0.0076
Iteration: 3989; Percent complete: 99.7%; Average loss: 0.0095
Iteration: 3990; Percent complete: 99.8%; Average loss:

Run Evaluation
~~~~~~~~~~~~~~

To chat with your model, run the following block.




In [21]:
# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)

# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher, voc)

> cook chicken
Bot: type macaroni recommend thi recip get minut recip


KeyboardInterrupt: 

Conclusion
----------

That’s all for this one, folks. Congratulations, you now know the
fundamentals to building a generative chatbot model! If you’re
interested, you can try tailoring the chatbot’s behavior by tweaking the
model and training parameters and customizing the data that you train
the model on.

Check out the other tutorials for more cool deep learning applications
in PyTorch!




In [20]:
filename = 'data/Batch_generation_2/test_step_query_text.csv'
targetname = 'data/Batch_generation_2/test_step_query_target.csv'
encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)

# Begin chatting (uncomment and run the following line to begin)
text, target, responses = evaluateFile(encoder, decoder, searcher, voc, filename=filename, targetname=targetname)

with open('data/Batch_generation_2/test_step_query_predict.csv', 'wt') as f:
    for l in responses:
        f.write('{}\n'.format(l))

In [21]:
import os
res = os.popen('perl multi_bleu.perl data/Batch_generation_2/test_step_query_target.csv < data/Batch_generation_2/test_step_query_predict.csv')
res.read()

'BLEU = 8.97, 12.8/8.6/7.7/7.6 (BP=1.000, ratio=1.030, hyp_len=3152, ref_len=3059)\n'

In [26]:
res.close()

512

In [23]:
with open('data/Batch_generation_2/seq2seq_step_query_pred.txt', 'wt') as f:
    for i, (t, g, p) in enumerate(zip(text, target, responses)):
        print('\n\n\n')
        f.write('\n\n\n')
        if len(g) == 0:
            g = [None]
        print('TEXT:\n{}\nTARGET:\n{}\nPREDICT:\n{}'.format(t[0], g[0], p.strip()))
        f.write('TEXT:\n{}\nTARGET:\n{}\nPREDICT:\n{}'.format(t[0], g[0], p.strip()))
#     if i == 100:
#         break





TEXT:
additional recipes barbecue ideas available www walmart ca recipes buy raw materials walmart recipes
TARGET:
use raw material bought walmart make walmart recipes process easier
PREDICT:
let cool anything particular eg rack oven




TEXT:
combine ingredients pour ice sugared rim glass fizzy pink drink recipe
TARGET:
sugared rim glass
PREDICT:
need know portions add ingredient tablespoons




TEXT:
using pastry brush coat sides pork tenderloin place roasting pan rack honey mustard pork loin
TARGET:
happens pastry brush
PREDICT:
long heat oil wok almost smoking




TEXT:
preheat oven 375 degrees f guarantee scones donγçöt stick line baking sheet parchment paper oven temperature scones different altitudes
TARGET:
temperature needs change altitude
PREDICT:
need know degree celsius fahrenheit




TEXT:
instruction given package long mix ingredients making tricolor rotini pasta salad
TARGET:
need know long mix ingredients
PREDICT:
amount cool whip




TEXT:
roll dough ball place top

In [33]:
target

[[],
 [],
 [],
 [],
 ['cups packed light brown sugar tablespoons margarine tablespoons vegetable shortening cups dark molasses tablespoon baking soda cup boiling water cups all purpose flour sifted tablespoon ground cloves tablespoons ground ginger tablespoon ground cinnamon'],
 [],
 [],
 [],
 ['image of finished product'],
 [],
 [],
 ['peeled and cut into inch thick rounds bechamel sauce cup butter cups hot milk tablespoons flour eggs cup grated kefalograviera cheese or parm teaspoon salt'],
 [],
 ['visualized instruction'],
 [],
 ['how the chicken will look like after frying .'],
 [],
 ['tell exact amounts of ingredients'],
 ['place the cookie crusts in the freezer and make the banana ice cream'],
 ['show a video of this step and show the correct consistency .'],
 [],
 [],
 [],
 ['visualized instructions for clarifying purposes'],
 ['make the pink coconut cream .'],
 [],
 [],
 ['baking dish'],
 ['show images or video of this step . suggest turning the oven on or have the assistant tu