In [45]:
import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import numpy as np

device = torch.device("cpu")

In [46]:
corpus_name = "cornell movie-dialogs corpus"
corpus = os.path.join("data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

printLines(os.path.join(corpus, "movie_lines.txt"))

b'L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!\n'
b'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!\n'
b'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.\n'
b'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?\n'
b"L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.\n"
b'L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow\n'
b"L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.\n"
b'L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No\n'
b'L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?\n'
b'L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?\n'


In [47]:
# Splits each line of the file into a dictionary of fields
def loadLines(fileName, fields):
    lines = {}
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            lineObj = {}
            for i, field in enumerate(fields):
                lineObj[field] = values[i]
            lines[lineObj['lineID']] = lineObj
    return lines


# Groups fields of lines from `loadLines` into conversations based on *movie_conversations.txt*
def loadConversations(fileName, lines, fields):
    conversations = []
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            convObj = {}
            for i, field in enumerate(fields):
                convObj[field] = values[i]
            # Convert string to list (convObj["utteranceIDs"] == "['L598485', 'L598486', ...]")
            utterance_id_pattern = re.compile('L[0-9]+')
            lineIds = utterance_id_pattern.findall(convObj["utteranceIDs"])
            # Reassemble lines
            convObj["lines"] = []
            for lineId in lineIds:
                convObj["lines"].append(lines[lineId])
            conversations.append(convObj)
    return conversations


# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
    qa_pairs = []
    for conversation in conversations:
        # Iterate over all the lines of the conversation
        for i in range(len(conversation["lines"]) - 1):  # We ignore the last line (no answer for it)
            inputLine = conversation["lines"][i]["text"].strip()
            targetLine = conversation["lines"][i+1]["text"].strip()
            # Filter wrong samples (if one of the lists is empty)
            if inputLine and targetLine:
                qa_pairs.append([inputLine, targetLine])
    return qa_pairs

In [48]:
# Define path to new file
datafile = os.path.join(corpus, "formatted_movie_lines.txt")

delimiter = '\t'
# Unescape the delimiter
delimiter = str(codecs.decode(delimiter, "unicode_escape"))

# Initialize lines dict, conversations list, and field ids
lines = {}
conversations = []
MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]

# Load lines and process conversations
print("\nProcessing corpus...")
lines = loadLines(os.path.join(corpus, "movie_lines.txt"), MOVIE_LINES_FIELDS)
print("\nLoading conversations...")
conversations = loadConversations(os.path.join(corpus, "movie_conversations.txt"),
                                  lines, MOVIE_CONVERSATIONS_FIELDS)

# Write new csv file
print("\nWriting newly formatted file...")
with open(datafile, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')
    for pair in extractSentencePairs(conversations):
        writer.writerow(pair)

# Print a sample of lines
print("\nSample lines from file:")
printLines(datafile)


Processing corpus...

Loading conversations...

Writing newly formatted file...

Sample lines from file:
b"Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.\tWell, I thought we'd start with pronunciation, if that's okay with you.\n"
b"Well, I thought we'd start with pronunciation, if that's okay with you.\tNot the hacking and gagging and spitting part.  Please.\n"
b"Not the hacking and gagging and spitting part.  Please.\tOkay... then how 'bout we try out some French cuisine.  Saturday?  Night?\n"
b"You're asking me out.  That's so cute. What's your name again?\tForget it.\n"
b"No, no, it's my fault -- we didn't have a proper introduction ---\tCameron.\n"
b"Cameron.\tThe thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does.\n"
b"The thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't dat

In [49]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [50]:
MAX_LENGTH = 20  # Maximum sentence length to consider

# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
    print("Reading lines...")
    # Read the file and split into lines
    lines = open(datafile, encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    voc = Voc(corpus_name)
    return voc, pairs

# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
    # Input sequences need to preserve the last word for EOS token
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

# Filter pairs using filterPair condition
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
    pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs


# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)
# Print some pairs to validate
print("\npairs:")
for pair in pairs[:10]:
    print(pair)

Start preparing training data ...
Reading lines...
Read 221282 sentence pairs
Trimmed to 145439 sentence pairs
Counting words...
Counted words: 33027

pairs:
['well i thought we d start with pronunciation if that s okay with you .', 'not the hacking and gagging and spitting part . please .']
['not the hacking and gagging and spitting part . please .', 'okay . . . then how bout we try out some french cuisine . saturday ? night ?']
['you re asking me out . that s so cute . what s your name again ?', 'forget it .']
['no no it s my fault we didn t have a proper introduction', 'cameron .']
['gosh if only we could find kat a boyfriend . . .', 'let me see what i can do .']
['c esc ma tete . this is my head', 'right . see ? you re ready for the quiz .']
['that s because it s such a nice one .', 'forget french .']
['how is our little find the wench a date plan progressing ?', 'well there s someone i think might be']
['there .', 'where ?']
['you have my word . as a gentleman', 'you re sweet .']


In [51]:
MIN_COUNT = 3    # Minimum word count threshold for trimming

def trimRareWords(voc, pairs, MIN_COUNT):
    # Trim words used under the MIN_COUNT from the voc
    voc.trim(MIN_COUNT)
    # Filter out pairs with trimmed words
    keep_pairs = []
    for pair in pairs:
        input_sentence = pair[0]
        output_sentence = pair[1]
        keep_input = True
        keep_output = True
        # Check input sentence
        for word in input_sentence.split(' '):
            if word not in voc.word2index:
                keep_input = False
                break
        # Check output sentence
        for word in output_sentence.split(' '):
            if word not in voc.word2index:
                keep_output = False
                break

        # Only keep pairs that do not contain trimmed word(s) in their input or output sentence
        if keep_input and keep_output:
            keep_pairs.append(pair)

    print("Trimmed from {} pairs to {}, {:.4f} of total".format(len(pairs), len(keep_pairs), len(keep_pairs) / len(pairs)))
    return keep_pairs


# Trim voc and pairs
pairs = trimRareWords(voc, pairs, MIN_COUNT)

keep_words 17145 / 33024 = 0.5192
Trimmed from 145439 pairs to 126993, 0.8732 of total


In [52]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.ByteTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len


# Example for validation
small_batch_size = 5
batches = batch2TrainData(voc, [random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

print("input_variable:", input_variable)
print("lengths:", lengths)
print("target_variable:", target_variable)
print("mask:", mask)
print("max_target_len:", max_target_len)

input_variable: tensor([[  45,   46,   52,   71,   64],
        [ 321,  180,   80,  157,    4],
        [  88,   12, 1941,  466,  872],
        [ 226,  390,   98, 1893,   33],
        [ 579,  116,   77,   15,    2],
        [8266,   15,   15,    2,    0],
        [  37,   89,    2,    0,    0],
        [  10,  134,    0,    0,    0],
        [  45,  180,    0,    0,    0],
        [9960,  298,    0,    0,    0],
        [ 900,   34,    0,    0,    0],
        [7016,   33,    0,    0,    0],
        [ 170,    2,    0,    0,    0],
        [3488,    0,    0,    0,    0],
        [  15,    0,    0,    0,    0],
        [   2,    0,    0,    0,    0]])
lengths: tensor([16, 13,  7,  6,  5])
target_variable: tensor([[  40,   25,    4,  373,   14],
        [ 191,  186,  210,   15,   64],
        [  17,    4,   51,    2, 1421],
        [1790,  118,   52,    0,   37],
        [ 259,   33, 1735,    0,  651],
        [  14,    2, 2818,    0,   15],
        [ 479,    0,  191,    0,    2],
        

In [72]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)

        # Pack padded batch of sequences for RNN module
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        
        # Unpack padding
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        
        # Return output and final hidden state
        return outputs, hidden

In [64]:
# Luong attention layer
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [73]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)

        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(0)
        
        context = context.squeeze(1)
        
        concat_input = torch.cat((rnn_output, context), 1)
        
        concat_output = torch.tanh(self.concat(concat_input))
        
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        
        output = F.softmax(output, dim=1)
        
        # Return output and final hidden state
        return output, hidden

In [67]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [83]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
    
    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)
    
    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

In [69]:
def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename):

    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

In [59]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

In [60]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher, voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Bot:', ' '.join(output_words))

        except KeyError:
            print("Error: Encountered unknown word.")

In [84]:
# Configure models
model_name = 'cb_model'
attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 4000
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')
# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


In [86]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have cuda, configure cuda to call
for state in encoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

for state in decoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

# Run training iterations
print("Starting Training!")
trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip, corpus_name, loadFilename)




# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)

# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher, voc)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 6.5480
Iteration: 2; Percent complete: 0.1%; Average loss: 6.2206
Iteration: 3; Percent complete: 0.1%; Average loss: 6.0113
Iteration: 4; Percent complete: 0.1%; Average loss: 5.8383
Iteration: 5; Percent complete: 0.1%; Average loss: 5.7872
Iteration: 6; Percent complete: 0.1%; Average loss: 5.6039
Iteration: 7; Percent complete: 0.2%; Average loss: 5.7358
Iteration: 8; Percent complete: 0.2%; Average loss: 5.3806
Iteration: 9; Percent complete: 0.2%; Average loss: 5.5519
Iteration: 10; Percent complete: 0.2%; Average loss: 5.3811
Iteration: 11; Percent complete: 0.3%; Average loss: 5.2315
Iteration: 12; Percent complete: 0.3%; Average loss: 5.2000
Iteration: 13; Percent complete: 0.3%; Average loss: 5.3370
Iteration: 14; Percent complete: 0.4%; Average loss: 5.3769
Iteration: 15; Percent complete: 0.4%; Average loss: 5.1225
Iteration: 16; Percent complete: 0.4%

Iteration: 136; Percent complete: 3.4%; Average loss: 4.9717
Iteration: 137; Percent complete: 3.4%; Average loss: 4.8167
Iteration: 138; Percent complete: 3.5%; Average loss: 4.8270
Iteration: 139; Percent complete: 3.5%; Average loss: 4.6025
Iteration: 140; Percent complete: 3.5%; Average loss: 4.8805
Iteration: 141; Percent complete: 3.5%; Average loss: 4.8223
Iteration: 142; Percent complete: 3.5%; Average loss: 4.7121
Iteration: 143; Percent complete: 3.6%; Average loss: 4.6015
Iteration: 144; Percent complete: 3.6%; Average loss: 4.6669
Iteration: 145; Percent complete: 3.6%; Average loss: 4.7137
Iteration: 146; Percent complete: 3.6%; Average loss: 4.6528
Iteration: 147; Percent complete: 3.7%; Average loss: 4.6563
Iteration: 148; Percent complete: 3.7%; Average loss: 4.3077
Iteration: 149; Percent complete: 3.7%; Average loss: 4.7649
Iteration: 150; Percent complete: 3.8%; Average loss: 4.6624
Iteration: 151; Percent complete: 3.8%; Average loss: 4.9235
Iteration: 152; Percent 

Iteration: 271; Percent complete: 6.8%; Average loss: 4.4892
Iteration: 272; Percent complete: 6.8%; Average loss: 4.4237
Iteration: 273; Percent complete: 6.8%; Average loss: 4.5569
Iteration: 274; Percent complete: 6.9%; Average loss: 4.8537
Iteration: 275; Percent complete: 6.9%; Average loss: 4.2515
Iteration: 276; Percent complete: 6.9%; Average loss: 4.4168
Iteration: 277; Percent complete: 6.9%; Average loss: 4.5151
Iteration: 278; Percent complete: 7.0%; Average loss: 4.3388
Iteration: 279; Percent complete: 7.0%; Average loss: 4.4850
Iteration: 280; Percent complete: 7.0%; Average loss: 4.4285
Iteration: 281; Percent complete: 7.0%; Average loss: 4.4187
Iteration: 282; Percent complete: 7.0%; Average loss: 4.2754
Iteration: 283; Percent complete: 7.1%; Average loss: 4.4318
Iteration: 284; Percent complete: 7.1%; Average loss: 4.4286
Iteration: 285; Percent complete: 7.1%; Average loss: 4.3074
Iteration: 286; Percent complete: 7.1%; Average loss: 4.6079
Iteration: 287; Percent 

Iteration: 406; Percent complete: 10.2%; Average loss: 4.4415
Iteration: 407; Percent complete: 10.2%; Average loss: 4.4292
Iteration: 408; Percent complete: 10.2%; Average loss: 4.2108
Iteration: 409; Percent complete: 10.2%; Average loss: 4.4456
Iteration: 410; Percent complete: 10.2%; Average loss: 4.1468
Iteration: 411; Percent complete: 10.3%; Average loss: 4.1547
Iteration: 412; Percent complete: 10.3%; Average loss: 4.3950
Iteration: 413; Percent complete: 10.3%; Average loss: 4.5533
Iteration: 414; Percent complete: 10.3%; Average loss: 4.1895
Iteration: 415; Percent complete: 10.4%; Average loss: 4.0339
Iteration: 416; Percent complete: 10.4%; Average loss: 4.1842
Iteration: 417; Percent complete: 10.4%; Average loss: 4.5580
Iteration: 418; Percent complete: 10.4%; Average loss: 4.1779
Iteration: 419; Percent complete: 10.5%; Average loss: 4.1908
Iteration: 420; Percent complete: 10.5%; Average loss: 4.1663
Iteration: 421; Percent complete: 10.5%; Average loss: 4.4470
Iteratio

Iteration: 539; Percent complete: 13.5%; Average loss: 4.1549
Iteration: 540; Percent complete: 13.5%; Average loss: 3.9642
Iteration: 541; Percent complete: 13.5%; Average loss: 4.0163
Iteration: 542; Percent complete: 13.6%; Average loss: 4.0424
Iteration: 543; Percent complete: 13.6%; Average loss: 4.2471
Iteration: 544; Percent complete: 13.6%; Average loss: 4.0843
Iteration: 545; Percent complete: 13.6%; Average loss: 4.1556
Iteration: 546; Percent complete: 13.7%; Average loss: 4.3103
Iteration: 547; Percent complete: 13.7%; Average loss: 4.0631
Iteration: 548; Percent complete: 13.7%; Average loss: 4.5413
Iteration: 549; Percent complete: 13.7%; Average loss: 4.4418
Iteration: 550; Percent complete: 13.8%; Average loss: 3.9501
Iteration: 551; Percent complete: 13.8%; Average loss: 4.1209
Iteration: 552; Percent complete: 13.8%; Average loss: 4.4711
Iteration: 553; Percent complete: 13.8%; Average loss: 4.5056
Iteration: 554; Percent complete: 13.9%; Average loss: 4.2145
Iteratio

Iteration: 672; Percent complete: 16.8%; Average loss: 4.2117
Iteration: 673; Percent complete: 16.8%; Average loss: 4.0670
Iteration: 674; Percent complete: 16.9%; Average loss: 4.3849
Iteration: 675; Percent complete: 16.9%; Average loss: 4.3281
Iteration: 676; Percent complete: 16.9%; Average loss: 4.4226
Iteration: 677; Percent complete: 16.9%; Average loss: 4.1421
Iteration: 678; Percent complete: 17.0%; Average loss: 4.2113
Iteration: 679; Percent complete: 17.0%; Average loss: 4.2390
Iteration: 680; Percent complete: 17.0%; Average loss: 4.0399
Iteration: 681; Percent complete: 17.0%; Average loss: 3.9911
Iteration: 682; Percent complete: 17.1%; Average loss: 4.1325
Iteration: 683; Percent complete: 17.1%; Average loss: 4.0645
Iteration: 684; Percent complete: 17.1%; Average loss: 4.0336
Iteration: 685; Percent complete: 17.1%; Average loss: 4.0807
Iteration: 686; Percent complete: 17.2%; Average loss: 4.1073
Iteration: 687; Percent complete: 17.2%; Average loss: 4.0823
Iteratio

Iteration: 805; Percent complete: 20.1%; Average loss: 4.1557
Iteration: 806; Percent complete: 20.2%; Average loss: 3.9893
Iteration: 807; Percent complete: 20.2%; Average loss: 4.3846
Iteration: 808; Percent complete: 20.2%; Average loss: 4.2133
Iteration: 809; Percent complete: 20.2%; Average loss: 4.0681
Iteration: 810; Percent complete: 20.2%; Average loss: 4.4389
Iteration: 811; Percent complete: 20.3%; Average loss: 4.1106
Iteration: 812; Percent complete: 20.3%; Average loss: 4.2437
Iteration: 813; Percent complete: 20.3%; Average loss: 4.2336
Iteration: 814; Percent complete: 20.3%; Average loss: 4.0999
Iteration: 815; Percent complete: 20.4%; Average loss: 3.8598
Iteration: 816; Percent complete: 20.4%; Average loss: 4.0524
Iteration: 817; Percent complete: 20.4%; Average loss: 4.1333
Iteration: 818; Percent complete: 20.4%; Average loss: 4.0037
Iteration: 819; Percent complete: 20.5%; Average loss: 4.4555
Iteration: 820; Percent complete: 20.5%; Average loss: 4.0811
Iteratio

Iteration: 938; Percent complete: 23.4%; Average loss: 4.3288
Iteration: 939; Percent complete: 23.5%; Average loss: 3.9843
Iteration: 940; Percent complete: 23.5%; Average loss: 3.8361
Iteration: 941; Percent complete: 23.5%; Average loss: 3.9511
Iteration: 942; Percent complete: 23.5%; Average loss: 4.0031
Iteration: 943; Percent complete: 23.6%; Average loss: 4.1515
Iteration: 944; Percent complete: 23.6%; Average loss: 4.2639
Iteration: 945; Percent complete: 23.6%; Average loss: 4.0992
Iteration: 946; Percent complete: 23.6%; Average loss: 4.0664
Iteration: 947; Percent complete: 23.7%; Average loss: 4.0778
Iteration: 948; Percent complete: 23.7%; Average loss: 4.0001
Iteration: 949; Percent complete: 23.7%; Average loss: 4.0634
Iteration: 950; Percent complete: 23.8%; Average loss: 4.2128
Iteration: 951; Percent complete: 23.8%; Average loss: 4.0699
Iteration: 952; Percent complete: 23.8%; Average loss: 4.1801
Iteration: 953; Percent complete: 23.8%; Average loss: 3.8112
Iteratio

Iteration: 1070; Percent complete: 26.8%; Average loss: 4.3017
Iteration: 1071; Percent complete: 26.8%; Average loss: 3.8497
Iteration: 1072; Percent complete: 26.8%; Average loss: 4.0499
Iteration: 1073; Percent complete: 26.8%; Average loss: 3.9120
Iteration: 1074; Percent complete: 26.9%; Average loss: 3.9512
Iteration: 1075; Percent complete: 26.9%; Average loss: 3.9817
Iteration: 1076; Percent complete: 26.9%; Average loss: 3.9615
Iteration: 1077; Percent complete: 26.9%; Average loss: 4.3103
Iteration: 1078; Percent complete: 27.0%; Average loss: 4.0090
Iteration: 1079; Percent complete: 27.0%; Average loss: 4.0432
Iteration: 1080; Percent complete: 27.0%; Average loss: 4.0557
Iteration: 1081; Percent complete: 27.0%; Average loss: 4.2225
Iteration: 1082; Percent complete: 27.1%; Average loss: 4.1477
Iteration: 1083; Percent complete: 27.1%; Average loss: 3.9678
Iteration: 1084; Percent complete: 27.1%; Average loss: 4.0610
Iteration: 1085; Percent complete: 27.1%; Average loss:

Iteration: 1201; Percent complete: 30.0%; Average loss: 4.2403
Iteration: 1202; Percent complete: 30.0%; Average loss: 3.9446
Iteration: 1203; Percent complete: 30.1%; Average loss: 4.0133
Iteration: 1204; Percent complete: 30.1%; Average loss: 3.9828
Iteration: 1205; Percent complete: 30.1%; Average loss: 3.8787
Iteration: 1206; Percent complete: 30.1%; Average loss: 4.0018
Iteration: 1207; Percent complete: 30.2%; Average loss: 4.0156
Iteration: 1208; Percent complete: 30.2%; Average loss: 4.0028
Iteration: 1209; Percent complete: 30.2%; Average loss: 3.8999
Iteration: 1210; Percent complete: 30.2%; Average loss: 4.1915
Iteration: 1211; Percent complete: 30.3%; Average loss: 3.9552
Iteration: 1212; Percent complete: 30.3%; Average loss: 4.4178
Iteration: 1213; Percent complete: 30.3%; Average loss: 4.2144
Iteration: 1214; Percent complete: 30.3%; Average loss: 3.9942
Iteration: 1215; Percent complete: 30.4%; Average loss: 4.0439
Iteration: 1216; Percent complete: 30.4%; Average loss:

Iteration: 1332; Percent complete: 33.3%; Average loss: 4.0358
Iteration: 1333; Percent complete: 33.3%; Average loss: 4.0611
Iteration: 1334; Percent complete: 33.4%; Average loss: 4.1244
Iteration: 1335; Percent complete: 33.4%; Average loss: 3.9553
Iteration: 1336; Percent complete: 33.4%; Average loss: 3.8557
Iteration: 1337; Percent complete: 33.4%; Average loss: 4.0341
Iteration: 1338; Percent complete: 33.5%; Average loss: 4.2143
Iteration: 1339; Percent complete: 33.5%; Average loss: 3.9843
Iteration: 1340; Percent complete: 33.5%; Average loss: 4.0321
Iteration: 1341; Percent complete: 33.5%; Average loss: 3.9768
Iteration: 1342; Percent complete: 33.6%; Average loss: 3.7675
Iteration: 1343; Percent complete: 33.6%; Average loss: 4.0492
Iteration: 1344; Percent complete: 33.6%; Average loss: 3.8942
Iteration: 1345; Percent complete: 33.6%; Average loss: 4.1655
Iteration: 1346; Percent complete: 33.7%; Average loss: 4.1067
Iteration: 1347; Percent complete: 33.7%; Average loss:

Iteration: 1463; Percent complete: 36.6%; Average loss: 3.7480
Iteration: 1464; Percent complete: 36.6%; Average loss: 4.0787
Iteration: 1465; Percent complete: 36.6%; Average loss: 3.8068
Iteration: 1466; Percent complete: 36.6%; Average loss: 3.9650
Iteration: 1467; Percent complete: 36.7%; Average loss: 3.8279
Iteration: 1468; Percent complete: 36.7%; Average loss: 3.9189
Iteration: 1469; Percent complete: 36.7%; Average loss: 3.9196
Iteration: 1470; Percent complete: 36.8%; Average loss: 3.9197
Iteration: 1471; Percent complete: 36.8%; Average loss: 4.1327
Iteration: 1472; Percent complete: 36.8%; Average loss: 4.0850
Iteration: 1473; Percent complete: 36.8%; Average loss: 3.8312
Iteration: 1474; Percent complete: 36.9%; Average loss: 3.6707
Iteration: 1475; Percent complete: 36.9%; Average loss: 4.0529
Iteration: 1476; Percent complete: 36.9%; Average loss: 3.9984
Iteration: 1477; Percent complete: 36.9%; Average loss: 4.0357
Iteration: 1478; Percent complete: 37.0%; Average loss:

Iteration: 1594; Percent complete: 39.9%; Average loss: 3.7610
Iteration: 1595; Percent complete: 39.9%; Average loss: 4.0060
Iteration: 1596; Percent complete: 39.9%; Average loss: 3.9334
Iteration: 1597; Percent complete: 39.9%; Average loss: 4.0010
Iteration: 1598; Percent complete: 40.0%; Average loss: 4.0142
Iteration: 1599; Percent complete: 40.0%; Average loss: 4.1361
Iteration: 1600; Percent complete: 40.0%; Average loss: 4.0945
Iteration: 1601; Percent complete: 40.0%; Average loss: 3.8788
Iteration: 1602; Percent complete: 40.1%; Average loss: 3.9450
Iteration: 1603; Percent complete: 40.1%; Average loss: 3.7229
Iteration: 1604; Percent complete: 40.1%; Average loss: 4.0291
Iteration: 1605; Percent complete: 40.1%; Average loss: 4.0669
Iteration: 1606; Percent complete: 40.2%; Average loss: 3.9460
Iteration: 1607; Percent complete: 40.2%; Average loss: 3.9825
Iteration: 1608; Percent complete: 40.2%; Average loss: 3.9895
Iteration: 1609; Percent complete: 40.2%; Average loss:

Iteration: 1725; Percent complete: 43.1%; Average loss: 3.7419
Iteration: 1726; Percent complete: 43.1%; Average loss: 3.6534
Iteration: 1727; Percent complete: 43.2%; Average loss: 3.7956
Iteration: 1728; Percent complete: 43.2%; Average loss: 3.9378
Iteration: 1729; Percent complete: 43.2%; Average loss: 3.9767
Iteration: 1730; Percent complete: 43.2%; Average loss: 4.1793
Iteration: 1731; Percent complete: 43.3%; Average loss: 3.8079
Iteration: 1732; Percent complete: 43.3%; Average loss: 3.7366
Iteration: 1733; Percent complete: 43.3%; Average loss: 4.1402
Iteration: 1734; Percent complete: 43.4%; Average loss: 3.8407
Iteration: 1735; Percent complete: 43.4%; Average loss: 3.9413
Iteration: 1736; Percent complete: 43.4%; Average loss: 3.8467
Iteration: 1737; Percent complete: 43.4%; Average loss: 4.0946
Iteration: 1738; Percent complete: 43.5%; Average loss: 3.8277
Iteration: 1739; Percent complete: 43.5%; Average loss: 3.7674
Iteration: 1740; Percent complete: 43.5%; Average loss:

Iteration: 1856; Percent complete: 46.4%; Average loss: 4.1516
Iteration: 1857; Percent complete: 46.4%; Average loss: 4.2479
Iteration: 1858; Percent complete: 46.5%; Average loss: 4.0718
Iteration: 1859; Percent complete: 46.5%; Average loss: 3.9605
Iteration: 1860; Percent complete: 46.5%; Average loss: 3.8931
Iteration: 1861; Percent complete: 46.5%; Average loss: 3.8714
Iteration: 1862; Percent complete: 46.6%; Average loss: 3.9373
Iteration: 1863; Percent complete: 46.6%; Average loss: 3.7366
Iteration: 1864; Percent complete: 46.6%; Average loss: 4.1304
Iteration: 1865; Percent complete: 46.6%; Average loss: 3.7228
Iteration: 1866; Percent complete: 46.7%; Average loss: 3.8167
Iteration: 1867; Percent complete: 46.7%; Average loss: 3.9371
Iteration: 1868; Percent complete: 46.7%; Average loss: 3.9100
Iteration: 1869; Percent complete: 46.7%; Average loss: 3.9682
Iteration: 1870; Percent complete: 46.8%; Average loss: 3.7969
Iteration: 1871; Percent complete: 46.8%; Average loss:

Iteration: 1987; Percent complete: 49.7%; Average loss: 3.9209
Iteration: 1988; Percent complete: 49.7%; Average loss: 3.8372
Iteration: 1989; Percent complete: 49.7%; Average loss: 4.0703
Iteration: 1990; Percent complete: 49.8%; Average loss: 4.0845
Iteration: 1991; Percent complete: 49.8%; Average loss: 3.9439
Iteration: 1992; Percent complete: 49.8%; Average loss: 3.8488
Iteration: 1993; Percent complete: 49.8%; Average loss: 3.8351
Iteration: 1994; Percent complete: 49.9%; Average loss: 3.8088
Iteration: 1995; Percent complete: 49.9%; Average loss: 4.0654
Iteration: 1996; Percent complete: 49.9%; Average loss: 3.8961
Iteration: 1997; Percent complete: 49.9%; Average loss: 3.6721
Iteration: 1998; Percent complete: 50.0%; Average loss: 3.7089
Iteration: 1999; Percent complete: 50.0%; Average loss: 3.6823
Iteration: 2000; Percent complete: 50.0%; Average loss: 3.8802
Iteration: 2001; Percent complete: 50.0%; Average loss: 3.8471
Iteration: 2002; Percent complete: 50.0%; Average loss:

Iteration: 2118; Percent complete: 52.9%; Average loss: 3.8972
Iteration: 2119; Percent complete: 53.0%; Average loss: 3.6222
Iteration: 2120; Percent complete: 53.0%; Average loss: 3.7496
Iteration: 2121; Percent complete: 53.0%; Average loss: 3.9323
Iteration: 2122; Percent complete: 53.0%; Average loss: 3.8149
Iteration: 2123; Percent complete: 53.1%; Average loss: 3.9939
Iteration: 2124; Percent complete: 53.1%; Average loss: 4.1600
Iteration: 2125; Percent complete: 53.1%; Average loss: 3.8744
Iteration: 2126; Percent complete: 53.1%; Average loss: 4.0963
Iteration: 2127; Percent complete: 53.2%; Average loss: 4.0173
Iteration: 2128; Percent complete: 53.2%; Average loss: 3.7766
Iteration: 2129; Percent complete: 53.2%; Average loss: 3.9757
Iteration: 2130; Percent complete: 53.2%; Average loss: 4.0744
Iteration: 2131; Percent complete: 53.3%; Average loss: 4.0199
Iteration: 2132; Percent complete: 53.3%; Average loss: 3.7865
Iteration: 2133; Percent complete: 53.3%; Average loss:

Iteration: 2249; Percent complete: 56.2%; Average loss: 3.7193
Iteration: 2250; Percent complete: 56.2%; Average loss: 3.7678
Iteration: 2251; Percent complete: 56.3%; Average loss: 4.0439
Iteration: 2252; Percent complete: 56.3%; Average loss: 4.0487
Iteration: 2253; Percent complete: 56.3%; Average loss: 3.9277
Iteration: 2254; Percent complete: 56.4%; Average loss: 3.8037
Iteration: 2255; Percent complete: 56.4%; Average loss: 3.6340
Iteration: 2256; Percent complete: 56.4%; Average loss: 3.9238
Iteration: 2257; Percent complete: 56.4%; Average loss: 4.0773
Iteration: 2258; Percent complete: 56.5%; Average loss: 3.8890
Iteration: 2259; Percent complete: 56.5%; Average loss: 3.6388
Iteration: 2260; Percent complete: 56.5%; Average loss: 3.9632
Iteration: 2261; Percent complete: 56.5%; Average loss: 3.9839
Iteration: 2262; Percent complete: 56.5%; Average loss: 4.0119
Iteration: 2263; Percent complete: 56.6%; Average loss: 3.9953
Iteration: 2264; Percent complete: 56.6%; Average loss:

KeyboardInterrupt: 