In [75]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math

In [76]:
corpus_name = "cornell movie-dialogs corpus"
corpus = os.path.join(r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

printLines(os.path.join(corpus, "movie_lines.txt"))

# Define path to new file
datafile = os.path.join(corpus, "formatted_movie_lines.txt")


b'L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!\n'
b'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!\n'
b'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.\n'
b'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?\n'
b"L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.\n"
b'L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow\n'
b"L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.\n"
b'L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No\n'
b'L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?\n'
b'L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?\n'


In [77]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

In [78]:
class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [79]:
MAX_LENGTH = 10  # Maximum sentence length to consider

# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
    print("Reading lines...")
    # Read the file and split into lines
    lines = open(datafile, encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    voc = Voc(corpus_name)
    return voc, pairs

# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
    # Input sequences need to preserve the last word for EOS token
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

# Filter pairs using filterPair condition
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
    pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs

In [80]:
datafile_validation = os.path.join(corpus, "formatted_movie_lines_valid.txt")

In [81]:
# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)

Start preparing training data ...
Reading lines...
Read 171282 sentence pairs
Trimmed to 49566 sentence pairs
Counting words...
Counted words: 15583


In [82]:
voc_validation, pairs_validation = loadPrepareData(corpus, corpus_name, datafile_validation, save_dir)

Start preparing training data ...
Reading lines...
Read 53424 sentence pairs
Trimmed to 15653 sentence pairs
Counting words...
Counted words: 7808


In [83]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len


In [84]:
def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

In [85]:
# Splits each line of the file into a dictionary of fields
def loadLines(fileName, fields):
    lines = {}
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            lineObj = {}
            for i, field in enumerate(fields):
                lineObj[field] = values[i]
            lines[lineObj['lineID']] = lineObj
    return lines

In [86]:
# Groups fields of lines from `loadLines` into conversations based on *movie_conversations.txt*
def loadConversations(fileName, lines, fields):
    conversations = []
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            convObj = {}
            for i, field in enumerate(fields):
                convObj[field] = values[i]
            # Convert string to list (convObj["utteranceIDs"] == "['L598485', 'L598486', ...]")
            utterance_id_pattern = re.compile('L[0-9]+')
            lineIds = utterance_id_pattern.findall(convObj["utteranceIDs"])
            # Reassemble lines
            convObj["lines"] = []
            for lineId in lineIds:
                convObj["lines"].append(lines[lineId])
            conversations.append(convObj)
    return conversations

In [87]:
# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
    qa_pairs = []
    for conversation in conversations:
        # Iterate over all the lines of the conversation
        for i in range(len(conversation["lines"]) - 1):  # We ignore the last line (no answer for it)
            inputLine = conversation["lines"][i]["text"].strip()
            targetLine = conversation["lines"][i+1]["text"].strip()
            # Filter wrong samples (if one of the lists is empty)
            if inputLine and targetLine:
                qa_pairs.append([inputLine, targetLine])
    return qa_pairs

In [88]:
# Define path to new file
datafile = r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\formatted_movie_lines.txt"
datafile_validation = r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\formatted_movie_lines_validation.txt"


In [89]:
delimiter = '\t'
# Unescape the delimiter
delimiter = str(codecs.decode(delimiter, "unicode_escape"))

# Initialize lines dict, conversations list, and field ids
lines = {}
conversations = []
MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]

In [90]:
 # Load lines and process conversations
print("\nProcessing corpus...")
lines = loadLines(r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\movie_lines.txt", MOVIE_LINES_FIELDS)
print("\nLoading conversations...")
conversations = loadConversations(r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\movie_conversations.txt",
                              lines, MOVIE_CONVERSATIONS_FIELDS)

# conversations=conversations[0:50000]
# Write new csv file
number = 1
print("\nWriting Validation formatted file...")
with open(datafile_validation, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')

    for pair in extractSentencePairs(conversations):
        if number <= 50000:
            writer.writerow(pair)
        number += 1

number = 1
print("\nWriting Training  formatted file...")
with open(datafile, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')
    for pair in extractSentencePairs(conversations):
        if number > 50000:
            writer.writerow(pair)
        number += 1



Processing corpus...

Loading conversations...

Writing Validation formatted file...

Writing Training  formatted file...


In [91]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF ,LatentDirichletAllocation
from sklearn.pipeline import Pipeline
from sklearn.datasets import fetch_20newsgroups
import numpy as np

In [92]:
def save_model(dictionary, feature_names, filename):
    with open(filename, "wb") as f:
        np.savez(f, dictionary=dictionary, feature_names=feature_names)
        
def load_model(filename):
    """load model saved with save_model. Returns a tupe of dictionary, feature_names"""
    with open(filename, "rb") as f:
        arrs = np.load(f)
        return arrs["dictionary"], arrs["feature_names"]
    

def print_top_words(dictionary, feature_names, n_top_words):
    """print the top n_top_words from each topic in the dictionary matrix
    of dictionary. Feature names is a list of words with indices corresponding
    to the columns of the dictionary"""
    for topic_idx, topic in enumerate(dictionary):
        message = "Topic #%d: " % topic_idx
        message += " ".join([feature_names[i]
                             for i in topic.argsort()[:-n_top_words - 1:-1]])
        print(message)
    print()


In [93]:
def train_model(data, n_features, n_components):
    model = Pipeline(steps=[("tfidf", TfidfVectorizer(max_df=0.95, min_df=2,
                                                      max_features=n_features, stop_words='english')),
                            ("lda", LatentDirichletAllocation(n_components=n_components, random_state=1))])
    model.fit(data)
    features = model["tfidf"].get_feature_names()
    dictionary = model["lda"].components_
    return dictionary, features

def train_save_model(data, n_features, n_components, filename):
    dictionary, features = train_model(data, n_features, n_components)
    save_model(dictionary, features, filename)
    return dictionary, features        

def load_news():
    dataset = fetch_20newsgroups(shuffle=True, random_state=1,
                                 remove=('headers', 'footers', 'quotes'))
    data_samples = dataset.data
    return data_samples   


In [94]:
# number of top words to use in tfidf representation of data
n_features = 1000
# number of topics in LDA
n_components = 10

# train model and save it
print("Loading, training, and saving 20 news groups model")
news_data = load_news()
news_dict, news_feats = train_save_model(news_data, n_features, n_components, "data/topic/news-lda.npz")
print_top_words(news_dict, news_feats, 10)

Loading, training, and saving 20 news groups model
Topic #0: 00 10 11 1993 15 12 23 13 apr 17
Topic #1: god people think jesus don say believe just does know
Topic #2: windows window file program use files using edu server motif
Topic #3: government people key israel gun law don state encryption public
Topic #4: drive card mac software video monitor use windows pc scsi
Topic #5: space nasa new orbit earth air years launch research water
Topic #6: game team year games season players play hockey league win
Topic #7: mail phone address thanks sale edu email interested 00 price
Topic #8: just like car don bike good know think ve time
Topic #9: thanks know mail looking post hi info faq help appreciated



In [95]:
USE_CUDA = torch.cuda.is_available()
print("USE_CUDA", USE_CUDA)
if USE_CUDA:
    device = torch.device("cuda")

USE_CUDA True


In [96]:
def calculate_codes(topic_for_code, input_seq_for_code, voc, feature_path, batch_size):
    # batch_size = 64 for training, 1 for chatting
    ldadict = np.load(feature_path)["feature_names"]
    new_input_seq = torch.zeros(batch_size, len(ldadict))

    for i in range(batch_size):
        for j in range(len(input_seq_for_code[i])):
            input_seq_words = voc.index2word[input_seq_for_code[i][j].item()]
            for check_index in range(len(ldadict)):
                if ldadict[check_index] == input_seq_words:
                    new_input_seq[i][check_index] = 1

    three_d_topic = topic_for_code.repeat(batch_size, 1, 1).to(device)
    three_d_q = new_input_seq.repeat(1, 1, 1).permute(1, 2, 0).to(device)

    return torch.bmm(three_d_topic, three_d_q)


In [97]:
class EncoderRNN(nn.Module):
    def __init__(self,
                 hidden_size,
                 embedding,
                 topics,
                 n_layers=1,
                 dropout=0,
                 batch_size=64):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding
        self.topics = topics
        self.batch_size = batch_size
        # self.voc = voc
        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        self.gru = nn.GRU(hidden_size,
                          hidden_size,
                          n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        #print(input_seq)
        batch_size = 1
        #get the codes first
        input_seq_for_code = input_seq.transpose(0,1)
        topic_for_code = self.topics
        feature_path = r"data/topic/news-lda.npz"
        codes = calculate_codes(topic_for_code, input_seq_for_code, voc, feature_path, self.batch_size)
        #print(codes.size())
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden, codes


In [98]:
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)


In [99]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [100]:
def validation(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden, codes = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    teacher_forcing_ratio = 1.0
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    return sum(print_losses) / n_totals


In [101]:
def train(input_variable,
          lengths,
          target_variable,
          mask,
          max_target_len,
          encoder,
          decoder,
          embedding,
          encoder_optimizer,
          decoder_optimizer,
          batch_size,
          clip,
          max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden, codes = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    #decoder_hidden = encoder_hidden[:decoder.n_layers]
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    teacher_forcing_ratio = 1.0
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):

            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals


In [102]:
def trainIters(model_name,
               voc,
               voc_validation,
               pairs,
               pairs_validation,
               encoder,
               decoder,
               encoder_optimizer,
               decoder_optimizer,
               embedding,
               encoder_n_layers,
               decoder_n_layers,
               save_dir,
               n_iteration,
               batch_size,
               print_every,
               save_every,
               clip,
               corpus_name,
               loadFilename,
               DICT_NAME,
               checkpoint):

    #history record file
    history_file = open(r'history_delta.txt', 'w')

    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    training_batches_validation = [batch2TrainData(voc_validation, [random.choice(pairs_validation) for _ in range(batch_size)])
                        for _ in range(n_iteration)]

    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    print_loss_validation = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        training_batch_validation = training_batches_validation[iteration - 1]

        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch
        input_variable_validation, lengths_validation, target_variable_validation, mask_validation, max_target_len_validation = training_batch_validation

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss
        loss_validation = validation(input_variable_validation, lengths_validation, target_variable_validation, mask_validation, max_target_len_validation, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss_validation += loss_validation

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print_loss_avg_validation = print_loss_validation / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Training loss: {:.4f}; Validation loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg, print_loss_avg_validation))
            history_file.write('{}'.format(print_loss_avg))
            history_file.write(' ')
            history_file.write('{}'.format(print_loss_avg_validation))
            history_file.write("\n")

            print_loss = 0
            print_loss_validation = 0



In [103]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher, voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            start_time = time.time()
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Bot:', ' '.join(output_words))
            end_time = time.time()
            print('Responding time:', end_time - start_time)
        except KeyError:
            print("Error: Encountered unknown word.")


In [104]:
def rand_pick(probabilities):
    x=random.uniform(0,1)
    #print(probabilities[0][20])
    cumulative_probability = 0.0
    #list = [n for n in range(len(probabilities))]
    for item in range(len(probabilities[0])):
        cumulative_probability += probabilities[0][item]
        if x < cumulative_probability:
            break
    item_list = torch.zeros([1], device=device, dtype=torch.long)
    item_list[0] = item
    score = torch.zeros([1],device=device)
    score[0]=probabilities[0][item]
    
    return item_list, score

class SearchDecoder(nn.Module):
    def __init__(self, encoder, decoder, batch_size=1):
        super(SearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.batch_size = batch_size

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden, codes = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:self.decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input,
                                                          decoder_hidden,
                                                          encoder_outputs,
                                                          codes,
                                                          self.batch_size)
            # Obtain word based on probability distribution token and its softmax score
            decoder_input, decoder_scores = my_random_pick(decoder_output)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores


In [105]:
class TopicAttention(nn.Module):
    def __init__(self, topic_vocab_size, enc_hid_dim, dec_hid_dim):
        super(TopicAttention, self).__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim

        self.attn = nn.Linear(topic_vocab_size + dec_hid_dim + enc_hid_dim, dec_hid_dim)
        self.v = nn.Parameter(torch.rand(dec_hid_dim))

    def forward(self, hidden, topic_dict, enc_hidden):
        batch_size = enc_hidden.shape[0]
        num_topics = topic_dict.shape[0]

        hidden = hidden.repeat(num_topics, 1, 1).permute(1, 0, 2)
        enc_hidden = enc_hidden.repeat(num_topics, 1, 1).permute(1, 0, 2)
        topic_dict = topic_dict.repeat(batch_size, 1, 1)

        
        energy = torch.tanh(self.attn(torch.cat((hidden, topic_dict, enc_hidden), dim=2)))

        # energy = [batch_size, dec_hid_dim]
        energy = energy.permute(0, 2, 1)

        v = self.v.repeat(batch_size, 1).unsqueeze(1)

        # v = [batch_size, 1, dec_hid_dim]
        attention = torch.bmm(v, energy).squeeze(1)

        return F.softmax(attention, dim=1).unsqueeze(1)


In [106]:
class TopicDecoder(nn.Module):
    def __init__(self,
                 attn_model,
                 embedding,
                 hidden_size,
                 output_size,
                 enc_hid_dim,
                 dec_hid_dim,
                 topics,
                 topic_vocab_size,
                 n_layers=1,
                 dropout=0.1,
                 batch_size=1):
        super(TopicDecoder, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.topics = topics
        self.topic_vocab_size = topic_vocab_size
        self.batch_size = batch_size

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2 + topic_vocab_size + 1000, hidden_size) #1000 is len of topic dictionary
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)
        self.topic_attn = TopicAttention(topic_vocab_size, enc_hid_dim, dec_hid_dim)

    def forward(self,
                input_step,
                last_hidden,
                encoder_outputs,
                codes,
                batch_size=1):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)

        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        topic_attn_weights = self.topic_attn(last_hidden, self.topics, encoder_outputs[-1])

        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        encoder_outputs = encoder_outputs.permute(1,0,2)
      
        context = torch.bmm(attn_weights, encoder_outputs)
     
        topic_context = torch.bmm(topic_attn_weights, self.topics.repeat(batch_size, 1, 1))
       
        topic_for_Pk = self.topics.repeat(batch_size,1,1).permute(0,2,1)
        # print('topic_for_Pk',topic_for_Pk.size())
        Pk_context = torch.bmm(topic_for_Pk, codes)
        # print('Pk_context',Pk_context.size())

        # Concatenate weighted context vector and GRU output using Luong eq. 5
       
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        topic_context = topic_context.squeeze(1)
      
        Pk_context = Pk_context.squeeze(2)
        concat_input = torch.cat((rnn_output, context, topic_context, Pk_context), 1)
        # print('concat_input', concat_input.size())
        concat_output = torch.tanh(self.concat(concat_input))

        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output, hidden

In [107]:
# Configure models
model_name = 'topic_model'
attn_model = 'dot'
hidden_size = 500
encoder_n_layers = 1
decoder_n_layers = 1
dropout = 0.1
batch_size = 64 # 64 for training, 1 for chatting

In [108]:
DICT_NAME = 'data/topic/news'
DICT_PATH = DICT_NAME + '-lda.npz'
print("DICT_PATH",DICT_PATH)
topic_dict = torch.tensor(np.load(DICT_PATH)["dictionary"], dtype=torch.float).to(device)

DICT_PATH data/topic/news-lda.npz


In [109]:
# Set checkpoint to load from
loadFilename = None
checkpoint_iter = 64000

In [110]:
print('Building encoder and decoder ...')
# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)

# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size=hidden_size,
                             embedding=embedding,
                             topics=topic_dict,
                             n_layers=encoder_n_layers,
                             dropout=dropout,
                             batch_size=batch_size)

enc_hid_dim, dec_hid_dim, emb_dim = hidden_size, hidden_size, hidden_size

decoder = TopicDecoder(attn_model,
                                  embedding,
                                  hidden_size,
                                  voc.num_words,
                                  enc_hid_dim,
                                  dec_hid_dim,
                                  topic_dict,
                                  topic_dict.shape[1],
                                  decoder_n_layers,
                                  dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)

# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


In [111]:
#start training
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 100

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have cuda, configure cuda to call
for state in encoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

for state in decoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

# Run training iterations
print("Starting Training!")
trainIters(model_name=model_name,
                   voc=voc,
                   voc_validation=voc_validation,
                   pairs=pairs,
                   pairs_validation=pairs_validation,
                   encoder=encoder,
                   decoder=decoder,
                   encoder_optimizer=encoder_optimizer,
                   decoder_optimizer=decoder_optimizer,
                   embedding=embedding,
                   encoder_n_layers=encoder_n_layers,
                   decoder_n_layers=decoder_n_layers,
                   save_dir=save_dir,
                   n_iteration=n_iteration,
                   batch_size=batch_size,
                   print_every=print_every,
                   save_every=save_every,
                   clip=clip,
                   corpus_name=corpus_name,
                   loadFilename=loadFilename,
                   DICT_NAME=DICT_NAME,
                   checkpoint=None)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Training loss: 9.6927; Validation loss: 8.8661
Iteration: 2; Percent complete: 0.1%; Training loss: 8.6254; Validation loss: 8.6060
Iteration: 3; Percent complete: 0.1%; Training loss: 8.3154; Validation loss: 8.4682
Iteration: 4; Percent complete: 0.1%; Training loss: 8.0159; Validation loss: 8.1137
Iteration: 5; Percent complete: 0.1%; Training loss: 7.8199; Validation loss: 8.1513
Iteration: 6; Percent complete: 0.1%; Training loss: 7.4927; Validation loss: 7.9734
Iteration: 7; Percent complete: 0.2%; Training loss: 7.3132; Validation loss: 7.7576
Iteration: 8; Percent complete: 0.2%; Training loss: 6.8961; Validation loss: 7.9036
Iteration: 9; Percent complete: 0.2%; Training loss: 6.9288; Validation loss: 7.8743
Iteration: 10; Percent complete: 0.2%; Training loss: 6.9273; Validation loss: 7.6272
Iteration: 11; Percent complete: 0.3%; Training loss: 6.6716; Validation loss

Iteration: 96; Percent complete: 2.4%; Training loss: 5.2090; Validation loss: 7.5012
Iteration: 97; Percent complete: 2.4%; Training loss: 5.1696; Validation loss: 7.4091
Iteration: 98; Percent complete: 2.5%; Training loss: 5.2261; Validation loss: 7.4273
Iteration: 99; Percent complete: 2.5%; Training loss: 5.3782; Validation loss: 7.4700
Iteration: 100; Percent complete: 2.5%; Training loss: 5.1777; Validation loss: 7.3874
Iteration: 101; Percent complete: 2.5%; Training loss: 5.2980; Validation loss: 7.3899
Iteration: 102; Percent complete: 2.5%; Training loss: 5.3402; Validation loss: 7.3254
Iteration: 103; Percent complete: 2.6%; Training loss: 5.2163; Validation loss: 7.7297
Iteration: 104; Percent complete: 2.6%; Training loss: 5.2175; Validation loss: 7.5023
Iteration: 105; Percent complete: 2.6%; Training loss: 5.3244; Validation loss: 7.5709
Iteration: 106; Percent complete: 2.6%; Training loss: 5.2211; Validation loss: 7.4021
Iteration: 107; Percent complete: 2.7%; Trainin

Iteration: 191; Percent complete: 4.8%; Training loss: 5.2445; Validation loss: 7.5980
Iteration: 192; Percent complete: 4.8%; Training loss: 5.2056; Validation loss: 7.6021
Iteration: 193; Percent complete: 4.8%; Training loss: 5.1456; Validation loss: 7.1169
Iteration: 194; Percent complete: 4.9%; Training loss: 5.2405; Validation loss: 7.4026
Iteration: 195; Percent complete: 4.9%; Training loss: 5.2292; Validation loss: 7.6006
Iteration: 196; Percent complete: 4.9%; Training loss: 5.0408; Validation loss: 7.4731
Iteration: 197; Percent complete: 4.9%; Training loss: 5.1386; Validation loss: 7.5594
Iteration: 198; Percent complete: 5.0%; Training loss: 5.3031; Validation loss: 7.6250
Iteration: 199; Percent complete: 5.0%; Training loss: 5.1498; Validation loss: 7.5452
Iteration: 200; Percent complete: 5.0%; Training loss: 5.1174; Validation loss: 7.1607
Iteration: 201; Percent complete: 5.0%; Training loss: 5.2292; Validation loss: 7.4229
Iteration: 202; Percent complete: 5.1%; Tra

Iteration: 286; Percent complete: 7.1%; Training loss: 5.0943; Validation loss: 7.5156
Iteration: 287; Percent complete: 7.2%; Training loss: 5.3101; Validation loss: 7.3877
Iteration: 288; Percent complete: 7.2%; Training loss: 5.4109; Validation loss: 7.5594
Iteration: 289; Percent complete: 7.2%; Training loss: 5.3191; Validation loss: 7.4714
Iteration: 290; Percent complete: 7.2%; Training loss: 5.2106; Validation loss: 7.4162
Iteration: 291; Percent complete: 7.3%; Training loss: 5.0473; Validation loss: 7.3196
Iteration: 292; Percent complete: 7.3%; Training loss: 5.1000; Validation loss: 7.5980
Iteration: 293; Percent complete: 7.3%; Training loss: 5.2228; Validation loss: 7.4368
Iteration: 294; Percent complete: 7.3%; Training loss: 5.1325; Validation loss: 7.3275
Iteration: 295; Percent complete: 7.4%; Training loss: 5.1854; Validation loss: 7.6276
Iteration: 296; Percent complete: 7.4%; Training loss: 5.2726; Validation loss: 7.4364
Iteration: 297; Percent complete: 7.4%; Tra

Iteration: 381; Percent complete: 9.5%; Training loss: 4.9532; Validation loss: 7.5244
Iteration: 382; Percent complete: 9.6%; Training loss: 5.3752; Validation loss: 7.2984
Iteration: 383; Percent complete: 9.6%; Training loss: 5.4404; Validation loss: 7.5133
Iteration: 384; Percent complete: 9.6%; Training loss: 4.9760; Validation loss: 7.3942
Iteration: 385; Percent complete: 9.6%; Training loss: 5.0143; Validation loss: 7.5660
Iteration: 386; Percent complete: 9.7%; Training loss: 5.1966; Validation loss: 7.3180
Iteration: 387; Percent complete: 9.7%; Training loss: 5.2436; Validation loss: 7.4537
Iteration: 388; Percent complete: 9.7%; Training loss: 5.0836; Validation loss: 7.4817
Iteration: 389; Percent complete: 9.7%; Training loss: 5.0483; Validation loss: 7.6227
Iteration: 390; Percent complete: 9.8%; Training loss: 5.3567; Validation loss: 7.3444
Iteration: 391; Percent complete: 9.8%; Training loss: 5.2641; Validation loss: 7.2347
Iteration: 392; Percent complete: 9.8%; Tra

Iteration: 475; Percent complete: 11.9%; Training loss: 5.2868; Validation loss: 7.2834
Iteration: 476; Percent complete: 11.9%; Training loss: 5.2893; Validation loss: 7.4271
Iteration: 477; Percent complete: 11.9%; Training loss: 5.0142; Validation loss: 7.3244
Iteration: 478; Percent complete: 11.9%; Training loss: 5.1621; Validation loss: 7.5597
Iteration: 479; Percent complete: 12.0%; Training loss: 5.1121; Validation loss: 7.1480
Iteration: 480; Percent complete: 12.0%; Training loss: 5.0893; Validation loss: 7.3249
Iteration: 481; Percent complete: 12.0%; Training loss: 5.2408; Validation loss: 7.4402
Iteration: 482; Percent complete: 12.0%; Training loss: 4.9457; Validation loss: 7.2752
Iteration: 483; Percent complete: 12.1%; Training loss: 5.2220; Validation loss: 7.5828
Iteration: 484; Percent complete: 12.1%; Training loss: 5.2305; Validation loss: 7.4100
Iteration: 485; Percent complete: 12.1%; Training loss: 5.3501; Validation loss: 7.4408
Iteration: 486; Percent complete

Iteration: 569; Percent complete: 14.2%; Training loss: 5.2883; Validation loss: 7.6325
Iteration: 570; Percent complete: 14.2%; Training loss: 4.9588; Validation loss: 7.4414
Iteration: 571; Percent complete: 14.3%; Training loss: 5.1022; Validation loss: 7.1638
Iteration: 572; Percent complete: 14.3%; Training loss: 5.1939; Validation loss: 7.6601
Iteration: 573; Percent complete: 14.3%; Training loss: 5.3087; Validation loss: 7.2746
Iteration: 574; Percent complete: 14.3%; Training loss: 4.9998; Validation loss: 7.4909
Iteration: 575; Percent complete: 14.4%; Training loss: 5.1406; Validation loss: 7.3551
Iteration: 576; Percent complete: 14.4%; Training loss: 5.0049; Validation loss: 7.5721
Iteration: 577; Percent complete: 14.4%; Training loss: 5.3149; Validation loss: 7.4733
Iteration: 578; Percent complete: 14.4%; Training loss: 5.1701; Validation loss: 7.4505
Iteration: 579; Percent complete: 14.5%; Training loss: 5.4077; Validation loss: 7.5545
Iteration: 580; Percent complete

Iteration: 663; Percent complete: 16.6%; Training loss: 5.1001; Validation loss: 7.2282
Iteration: 664; Percent complete: 16.6%; Training loss: 5.1334; Validation loss: 7.5366
Iteration: 665; Percent complete: 16.6%; Training loss: 4.9662; Validation loss: 7.7648
Iteration: 666; Percent complete: 16.7%; Training loss: 5.1884; Validation loss: 7.5769
Iteration: 667; Percent complete: 16.7%; Training loss: 5.1726; Validation loss: 7.1728
Iteration: 668; Percent complete: 16.7%; Training loss: 5.2261; Validation loss: 7.5289
Iteration: 669; Percent complete: 16.7%; Training loss: 5.3135; Validation loss: 7.2893
Iteration: 670; Percent complete: 16.8%; Training loss: 5.2539; Validation loss: 7.7063
Iteration: 671; Percent complete: 16.8%; Training loss: 5.0307; Validation loss: 7.3681
Iteration: 672; Percent complete: 16.8%; Training loss: 5.3364; Validation loss: 7.4599
Iteration: 673; Percent complete: 16.8%; Training loss: 5.1102; Validation loss: 7.3545
Iteration: 674; Percent complete

Iteration: 757; Percent complete: 18.9%; Training loss: 5.0317; Validation loss: 7.3143
Iteration: 758; Percent complete: 18.9%; Training loss: 5.2786; Validation loss: 7.4965
Iteration: 759; Percent complete: 19.0%; Training loss: 4.8382; Validation loss: 7.4296
Iteration: 760; Percent complete: 19.0%; Training loss: 5.0525; Validation loss: 7.4210
Iteration: 761; Percent complete: 19.0%; Training loss: 5.0377; Validation loss: 7.2230
Iteration: 762; Percent complete: 19.1%; Training loss: 5.1393; Validation loss: 7.5143
Iteration: 763; Percent complete: 19.1%; Training loss: 5.1556; Validation loss: 7.5995
Iteration: 764; Percent complete: 19.1%; Training loss: 5.3456; Validation loss: 7.7584
Iteration: 765; Percent complete: 19.1%; Training loss: 5.0694; Validation loss: 7.3821
Iteration: 766; Percent complete: 19.1%; Training loss: 5.1705; Validation loss: 7.3611
Iteration: 767; Percent complete: 19.2%; Training loss: 5.0546; Validation loss: 7.4876
Iteration: 768; Percent complete

Iteration: 851; Percent complete: 21.3%; Training loss: 5.0897; Validation loss: 7.4800
Iteration: 852; Percent complete: 21.3%; Training loss: 4.9302; Validation loss: 7.3672
Iteration: 853; Percent complete: 21.3%; Training loss: 4.8738; Validation loss: 7.1459
Iteration: 854; Percent complete: 21.3%; Training loss: 5.0634; Validation loss: 7.2774
Iteration: 855; Percent complete: 21.4%; Training loss: 5.2296; Validation loss: 7.3931
Iteration: 856; Percent complete: 21.4%; Training loss: 5.1146; Validation loss: 7.1960
Iteration: 857; Percent complete: 21.4%; Training loss: 5.1140; Validation loss: 7.6304
Iteration: 858; Percent complete: 21.4%; Training loss: 5.2752; Validation loss: 7.3123
Iteration: 859; Percent complete: 21.5%; Training loss: 5.1823; Validation loss: 7.6719
Iteration: 860; Percent complete: 21.5%; Training loss: 5.0603; Validation loss: 7.7892
Iteration: 861; Percent complete: 21.5%; Training loss: 5.2163; Validation loss: 7.3948
Iteration: 862; Percent complete

Iteration: 945; Percent complete: 23.6%; Training loss: 4.8894; Validation loss: 7.1753
Iteration: 946; Percent complete: 23.6%; Training loss: 5.1004; Validation loss: 7.7820
Iteration: 947; Percent complete: 23.7%; Training loss: 5.1303; Validation loss: 7.6314
Iteration: 948; Percent complete: 23.7%; Training loss: 5.0183; Validation loss: 7.0257
Iteration: 949; Percent complete: 23.7%; Training loss: 5.1185; Validation loss: 7.4291
Iteration: 950; Percent complete: 23.8%; Training loss: 5.3571; Validation loss: 7.3925
Iteration: 951; Percent complete: 23.8%; Training loss: 5.2510; Validation loss: 7.3801
Iteration: 952; Percent complete: 23.8%; Training loss: 5.1129; Validation loss: 7.5517
Iteration: 953; Percent complete: 23.8%; Training loss: 5.0107; Validation loss: 7.6064
Iteration: 954; Percent complete: 23.8%; Training loss: 5.2483; Validation loss: 7.4487
Iteration: 955; Percent complete: 23.9%; Training loss: 4.8476; Validation loss: 6.9596
Iteration: 956; Percent complete

Iteration: 1038; Percent complete: 25.9%; Training loss: 5.1348; Validation loss: 7.3476
Iteration: 1039; Percent complete: 26.0%; Training loss: 5.2175; Validation loss: 7.4951
Iteration: 1040; Percent complete: 26.0%; Training loss: 5.1532; Validation loss: 7.2848
Iteration: 1041; Percent complete: 26.0%; Training loss: 5.0883; Validation loss: 7.3581
Iteration: 1042; Percent complete: 26.1%; Training loss: 5.2199; Validation loss: 7.4739
Iteration: 1043; Percent complete: 26.1%; Training loss: 5.3531; Validation loss: 7.3709
Iteration: 1044; Percent complete: 26.1%; Training loss: 5.0836; Validation loss: 7.1965
Iteration: 1045; Percent complete: 26.1%; Training loss: 4.9952; Validation loss: 7.5300
Iteration: 1046; Percent complete: 26.2%; Training loss: 5.2661; Validation loss: 7.7187
Iteration: 1047; Percent complete: 26.2%; Training loss: 5.1838; Validation loss: 7.4366
Iteration: 1048; Percent complete: 26.2%; Training loss: 4.9682; Validation loss: 7.3021
Iteration: 1049; Perc

Iteration: 1131; Percent complete: 28.3%; Training loss: 4.8771; Validation loss: 7.6792
Iteration: 1132; Percent complete: 28.3%; Training loss: 5.0876; Validation loss: 7.7332
Iteration: 1133; Percent complete: 28.3%; Training loss: 5.1959; Validation loss: 7.8007
Iteration: 1134; Percent complete: 28.3%; Training loss: 5.1981; Validation loss: 7.4750
Iteration: 1135; Percent complete: 28.4%; Training loss: 5.2387; Validation loss: 7.3533
Iteration: 1136; Percent complete: 28.4%; Training loss: 4.9906; Validation loss: 7.5954
Iteration: 1137; Percent complete: 28.4%; Training loss: 5.2212; Validation loss: 7.5963
Iteration: 1138; Percent complete: 28.4%; Training loss: 4.7666; Validation loss: 7.4788
Iteration: 1139; Percent complete: 28.5%; Training loss: 4.9522; Validation loss: 7.3307
Iteration: 1140; Percent complete: 28.5%; Training loss: 5.0630; Validation loss: 7.3010
Iteration: 1141; Percent complete: 28.5%; Training loss: 5.1821; Validation loss: 7.6444
Iteration: 1142; Perc

Iteration: 1224; Percent complete: 30.6%; Training loss: 5.2339; Validation loss: 7.7667
Iteration: 1225; Percent complete: 30.6%; Training loss: 5.1795; Validation loss: 7.0735
Iteration: 1226; Percent complete: 30.6%; Training loss: 4.9391; Validation loss: 7.5733
Iteration: 1227; Percent complete: 30.7%; Training loss: 5.0030; Validation loss: 7.6332
Iteration: 1228; Percent complete: 30.7%; Training loss: 5.0893; Validation loss: 7.3414
Iteration: 1229; Percent complete: 30.7%; Training loss: 5.0562; Validation loss: 7.2912
Iteration: 1230; Percent complete: 30.8%; Training loss: 5.1059; Validation loss: 7.1630
Iteration: 1231; Percent complete: 30.8%; Training loss: 4.7695; Validation loss: 7.6463
Iteration: 1232; Percent complete: 30.8%; Training loss: 4.9195; Validation loss: 7.5807
Iteration: 1233; Percent complete: 30.8%; Training loss: 5.2117; Validation loss: 7.6765
Iteration: 1234; Percent complete: 30.9%; Training loss: 5.1855; Validation loss: 7.6934
Iteration: 1235; Perc

Iteration: 1317; Percent complete: 32.9%; Training loss: 4.7942; Validation loss: 7.3131
Iteration: 1318; Percent complete: 33.0%; Training loss: 5.0198; Validation loss: 7.6207
Iteration: 1319; Percent complete: 33.0%; Training loss: 5.2379; Validation loss: 7.3054
Iteration: 1320; Percent complete: 33.0%; Training loss: 5.2376; Validation loss: 7.4766
Iteration: 1321; Percent complete: 33.0%; Training loss: 5.0266; Validation loss: 7.5034
Iteration: 1322; Percent complete: 33.1%; Training loss: 5.3132; Validation loss: 7.3942
Iteration: 1323; Percent complete: 33.1%; Training loss: 5.0523; Validation loss: 7.2433
Iteration: 1324; Percent complete: 33.1%; Training loss: 5.3318; Validation loss: 7.2778
Iteration: 1325; Percent complete: 33.1%; Training loss: 5.1688; Validation loss: 7.5666
Iteration: 1326; Percent complete: 33.1%; Training loss: 4.9313; Validation loss: 7.4711
Iteration: 1327; Percent complete: 33.2%; Training loss: 5.1497; Validation loss: 7.2698
Iteration: 1328; Perc

Iteration: 1410; Percent complete: 35.2%; Training loss: 5.2482; Validation loss: 7.4611
Iteration: 1411; Percent complete: 35.3%; Training loss: 5.1776; Validation loss: 7.5245
Iteration: 1412; Percent complete: 35.3%; Training loss: 4.9356; Validation loss: 7.8295
Iteration: 1413; Percent complete: 35.3%; Training loss: 5.3993; Validation loss: 7.0497
Iteration: 1414; Percent complete: 35.4%; Training loss: 5.1363; Validation loss: 7.2449
Iteration: 1415; Percent complete: 35.4%; Training loss: 5.1430; Validation loss: 7.2947
Iteration: 1416; Percent complete: 35.4%; Training loss: 4.9908; Validation loss: 7.6120
Iteration: 1417; Percent complete: 35.4%; Training loss: 5.1369; Validation loss: 7.3036
Iteration: 1418; Percent complete: 35.4%; Training loss: 4.9771; Validation loss: 7.3649
Iteration: 1419; Percent complete: 35.5%; Training loss: 4.9974; Validation loss: 6.9585
Iteration: 1420; Percent complete: 35.5%; Training loss: 5.1223; Validation loss: 7.6539
Iteration: 1421; Perc

Iteration: 1503; Percent complete: 37.6%; Training loss: 5.1385; Validation loss: 7.4274
Iteration: 1504; Percent complete: 37.6%; Training loss: 4.9577; Validation loss: 7.1133
Iteration: 1505; Percent complete: 37.6%; Training loss: 5.0216; Validation loss: 7.1975
Iteration: 1506; Percent complete: 37.6%; Training loss: 5.2375; Validation loss: 7.4504
Iteration: 1507; Percent complete: 37.7%; Training loss: 4.9606; Validation loss: 7.5425
Iteration: 1508; Percent complete: 37.7%; Training loss: 5.1733; Validation loss: 7.5764
Iteration: 1509; Percent complete: 37.7%; Training loss: 5.1496; Validation loss: 7.5072
Iteration: 1510; Percent complete: 37.8%; Training loss: 4.9973; Validation loss: 7.3395
Iteration: 1511; Percent complete: 37.8%; Training loss: 4.8903; Validation loss: 7.3281
Iteration: 1512; Percent complete: 37.8%; Training loss: 5.0435; Validation loss: 7.4772
Iteration: 1513; Percent complete: 37.8%; Training loss: 5.0861; Validation loss: 7.0677
Iteration: 1514; Perc

Iteration: 1596; Percent complete: 39.9%; Training loss: 4.9880; Validation loss: 7.1451
Iteration: 1597; Percent complete: 39.9%; Training loss: 5.0086; Validation loss: 7.4519
Iteration: 1598; Percent complete: 40.0%; Training loss: 5.1689; Validation loss: 7.3187
Iteration: 1599; Percent complete: 40.0%; Training loss: 5.0847; Validation loss: 7.4734
Iteration: 1600; Percent complete: 40.0%; Training loss: 5.1841; Validation loss: 7.2902
Iteration: 1601; Percent complete: 40.0%; Training loss: 4.9279; Validation loss: 7.5002
Iteration: 1602; Percent complete: 40.1%; Training loss: 4.9423; Validation loss: 7.7428
Iteration: 1603; Percent complete: 40.1%; Training loss: 4.9942; Validation loss: 7.4369
Iteration: 1604; Percent complete: 40.1%; Training loss: 5.2983; Validation loss: 7.7732
Iteration: 1605; Percent complete: 40.1%; Training loss: 5.0073; Validation loss: 7.5681
Iteration: 1606; Percent complete: 40.2%; Training loss: 4.9590; Validation loss: 7.3786
Iteration: 1607; Perc

Iteration: 1689; Percent complete: 42.2%; Training loss: 4.9366; Validation loss: 7.2373
Iteration: 1690; Percent complete: 42.2%; Training loss: 5.1795; Validation loss: 7.3929
Iteration: 1691; Percent complete: 42.3%; Training loss: 5.1117; Validation loss: 7.3083
Iteration: 1692; Percent complete: 42.3%; Training loss: 5.1420; Validation loss: 7.6616
Iteration: 1693; Percent complete: 42.3%; Training loss: 5.3456; Validation loss: 7.3509
Iteration: 1694; Percent complete: 42.4%; Training loss: 4.7888; Validation loss: 7.4291
Iteration: 1695; Percent complete: 42.4%; Training loss: 5.1096; Validation loss: 7.3915
Iteration: 1696; Percent complete: 42.4%; Training loss: 5.3550; Validation loss: 7.5409
Iteration: 1697; Percent complete: 42.4%; Training loss: 5.0557; Validation loss: 7.3477
Iteration: 1698; Percent complete: 42.4%; Training loss: 4.7843; Validation loss: 7.2728
Iteration: 1699; Percent complete: 42.5%; Training loss: 5.3577; Validation loss: 7.6521
Iteration: 1700; Perc

Iteration: 1782; Percent complete: 44.5%; Training loss: 5.1810; Validation loss: 7.6504
Iteration: 1783; Percent complete: 44.6%; Training loss: 5.1404; Validation loss: 7.2579
Iteration: 1784; Percent complete: 44.6%; Training loss: 5.0721; Validation loss: 7.4173
Iteration: 1785; Percent complete: 44.6%; Training loss: 5.1893; Validation loss: 7.8006
Iteration: 1786; Percent complete: 44.6%; Training loss: 5.1491; Validation loss: 7.9112
Iteration: 1787; Percent complete: 44.7%; Training loss: 5.3129; Validation loss: 7.3404
Iteration: 1788; Percent complete: 44.7%; Training loss: 5.1835; Validation loss: 7.7398
Iteration: 1789; Percent complete: 44.7%; Training loss: 4.9361; Validation loss: 7.5421
Iteration: 1790; Percent complete: 44.8%; Training loss: 5.0624; Validation loss: 7.2609
Iteration: 1791; Percent complete: 44.8%; Training loss: 5.1018; Validation loss: 7.3117
Iteration: 1792; Percent complete: 44.8%; Training loss: 5.1915; Validation loss: 7.4544
Iteration: 1793; Perc

Iteration: 1875; Percent complete: 46.9%; Training loss: 5.1664; Validation loss: 7.1411
Iteration: 1876; Percent complete: 46.9%; Training loss: 5.0325; Validation loss: 7.3954
Iteration: 1877; Percent complete: 46.9%; Training loss: 5.0760; Validation loss: 7.6172
Iteration: 1878; Percent complete: 46.9%; Training loss: 4.8614; Validation loss: 7.2328
Iteration: 1879; Percent complete: 47.0%; Training loss: 5.1302; Validation loss: 7.2349
Iteration: 1880; Percent complete: 47.0%; Training loss: 4.8428; Validation loss: 7.1010
Iteration: 1881; Percent complete: 47.0%; Training loss: 5.2160; Validation loss: 7.5884
Iteration: 1882; Percent complete: 47.0%; Training loss: 5.0386; Validation loss: 7.2269
Iteration: 1883; Percent complete: 47.1%; Training loss: 4.9535; Validation loss: 7.3075
Iteration: 1884; Percent complete: 47.1%; Training loss: 5.3203; Validation loss: 7.6850
Iteration: 1885; Percent complete: 47.1%; Training loss: 5.4345; Validation loss: 7.3928
Iteration: 1886; Perc

Iteration: 1968; Percent complete: 49.2%; Training loss: 5.2135; Validation loss: 7.1195
Iteration: 1969; Percent complete: 49.2%; Training loss: 4.8651; Validation loss: 7.5347
Iteration: 1970; Percent complete: 49.2%; Training loss: 5.1117; Validation loss: 7.2428
Iteration: 1971; Percent complete: 49.3%; Training loss: 4.9803; Validation loss: 7.1736
Iteration: 1972; Percent complete: 49.3%; Training loss: 5.1005; Validation loss: 7.6261
Iteration: 1973; Percent complete: 49.3%; Training loss: 5.1726; Validation loss: 7.4601
Iteration: 1974; Percent complete: 49.4%; Training loss: 5.0540; Validation loss: 7.7528
Iteration: 1975; Percent complete: 49.4%; Training loss: 5.1595; Validation loss: 7.2104
Iteration: 1976; Percent complete: 49.4%; Training loss: 5.0612; Validation loss: 7.3838
Iteration: 1977; Percent complete: 49.4%; Training loss: 5.1742; Validation loss: 7.6922
Iteration: 1978; Percent complete: 49.5%; Training loss: 5.2690; Validation loss: 7.5595
Iteration: 1979; Perc

Iteration: 2061; Percent complete: 51.5%; Training loss: 4.9542; Validation loss: 7.5399
Iteration: 2062; Percent complete: 51.5%; Training loss: 5.1302; Validation loss: 7.3664
Iteration: 2063; Percent complete: 51.6%; Training loss: 4.9613; Validation loss: 7.3723
Iteration: 2064; Percent complete: 51.6%; Training loss: 4.9892; Validation loss: 7.6484
Iteration: 2065; Percent complete: 51.6%; Training loss: 5.1833; Validation loss: 7.1651
Iteration: 2066; Percent complete: 51.6%; Training loss: 4.9899; Validation loss: 7.3175
Iteration: 2067; Percent complete: 51.7%; Training loss: 5.0359; Validation loss: 7.3558
Iteration: 2068; Percent complete: 51.7%; Training loss: 4.9784; Validation loss: 7.3821
Iteration: 2069; Percent complete: 51.7%; Training loss: 5.1089; Validation loss: 7.0491
Iteration: 2070; Percent complete: 51.7%; Training loss: 4.9955; Validation loss: 7.3530
Iteration: 2071; Percent complete: 51.8%; Training loss: 5.1987; Validation loss: 7.4960
Iteration: 2072; Perc

Iteration: 2154; Percent complete: 53.8%; Training loss: 5.1220; Validation loss: 7.6752
Iteration: 2155; Percent complete: 53.9%; Training loss: 5.0762; Validation loss: 7.6322
Iteration: 2156; Percent complete: 53.9%; Training loss: 5.2466; Validation loss: 7.4946
Iteration: 2157; Percent complete: 53.9%; Training loss: 5.0580; Validation loss: 7.7711
Iteration: 2158; Percent complete: 53.9%; Training loss: 4.7149; Validation loss: 7.3925
Iteration: 2159; Percent complete: 54.0%; Training loss: 5.1499; Validation loss: 7.4459
Iteration: 2160; Percent complete: 54.0%; Training loss: 5.0096; Validation loss: 7.4908
Iteration: 2161; Percent complete: 54.0%; Training loss: 5.1821; Validation loss: 7.4212
Iteration: 2162; Percent complete: 54.0%; Training loss: 4.9373; Validation loss: 7.5411
Iteration: 2163; Percent complete: 54.1%; Training loss: 5.0112; Validation loss: 7.3297
Iteration: 2164; Percent complete: 54.1%; Training loss: 4.8401; Validation loss: 7.7472
Iteration: 2165; Perc

Iteration: 2247; Percent complete: 56.2%; Training loss: 4.9135; Validation loss: 7.4489
Iteration: 2248; Percent complete: 56.2%; Training loss: 5.1354; Validation loss: 7.5824
Iteration: 2249; Percent complete: 56.2%; Training loss: 4.8024; Validation loss: 7.4410
Iteration: 2250; Percent complete: 56.2%; Training loss: 5.3262; Validation loss: 7.7590
Iteration: 2251; Percent complete: 56.3%; Training loss: 4.8182; Validation loss: 7.4651
Iteration: 2252; Percent complete: 56.3%; Training loss: 5.3571; Validation loss: 7.3234
Iteration: 2253; Percent complete: 56.3%; Training loss: 5.2785; Validation loss: 7.5577
Iteration: 2254; Percent complete: 56.4%; Training loss: 5.1926; Validation loss: 7.5726
Iteration: 2255; Percent complete: 56.4%; Training loss: 5.1555; Validation loss: 7.4878
Iteration: 2256; Percent complete: 56.4%; Training loss: 4.7940; Validation loss: 7.0565
Iteration: 2257; Percent complete: 56.4%; Training loss: 5.1312; Validation loss: 7.3153
Iteration: 2258; Perc

Iteration: 2340; Percent complete: 58.5%; Training loss: 5.0280; Validation loss: 6.7754
Iteration: 2341; Percent complete: 58.5%; Training loss: 5.0511; Validation loss: 7.6761
Iteration: 2342; Percent complete: 58.6%; Training loss: 5.1377; Validation loss: 7.5602
Iteration: 2343; Percent complete: 58.6%; Training loss: 4.8836; Validation loss: 7.6055
Iteration: 2344; Percent complete: 58.6%; Training loss: 5.1688; Validation loss: 7.4889
Iteration: 2345; Percent complete: 58.6%; Training loss: 5.2876; Validation loss: 7.6321
Iteration: 2346; Percent complete: 58.7%; Training loss: 5.2813; Validation loss: 7.6765
Iteration: 2347; Percent complete: 58.7%; Training loss: 4.8947; Validation loss: 7.6988
Iteration: 2348; Percent complete: 58.7%; Training loss: 5.0565; Validation loss: 7.3194
Iteration: 2349; Percent complete: 58.7%; Training loss: 5.2980; Validation loss: 7.3917
Iteration: 2350; Percent complete: 58.8%; Training loss: 5.0809; Validation loss: 7.4555
Iteration: 2351; Perc

Iteration: 2433; Percent complete: 60.8%; Training loss: 5.1404; Validation loss: 7.5672
Iteration: 2434; Percent complete: 60.9%; Training loss: 4.9392; Validation loss: 7.7423
Iteration: 2435; Percent complete: 60.9%; Training loss: 4.9681; Validation loss: 7.6261
Iteration: 2436; Percent complete: 60.9%; Training loss: 5.1184; Validation loss: 7.2633
Iteration: 2437; Percent complete: 60.9%; Training loss: 5.1572; Validation loss: 7.6636
Iteration: 2438; Percent complete: 61.0%; Training loss: 5.1567; Validation loss: 7.3823
Iteration: 2439; Percent complete: 61.0%; Training loss: 5.1916; Validation loss: 7.3096
Iteration: 2440; Percent complete: 61.0%; Training loss: 4.9738; Validation loss: 7.5516
Iteration: 2441; Percent complete: 61.0%; Training loss: 5.0896; Validation loss: 7.4557
Iteration: 2442; Percent complete: 61.1%; Training loss: 5.2911; Validation loss: 7.5017
Iteration: 2443; Percent complete: 61.1%; Training loss: 4.8927; Validation loss: 7.6665
Iteration: 2444; Perc

Iteration: 2526; Percent complete: 63.1%; Training loss: 4.9033; Validation loss: 7.8325
Iteration: 2527; Percent complete: 63.2%; Training loss: 5.1859; Validation loss: 7.7316
Iteration: 2528; Percent complete: 63.2%; Training loss: 4.9597; Validation loss: 7.6666
Iteration: 2529; Percent complete: 63.2%; Training loss: 5.1590; Validation loss: 7.6710
Iteration: 2530; Percent complete: 63.2%; Training loss: 4.9445; Validation loss: 7.6912
Iteration: 2531; Percent complete: 63.3%; Training loss: 4.9246; Validation loss: 7.6901
Iteration: 2532; Percent complete: 63.3%; Training loss: 5.1974; Validation loss: 7.5042
Iteration: 2533; Percent complete: 63.3%; Training loss: 4.9385; Validation loss: 7.2953
Iteration: 2534; Percent complete: 63.3%; Training loss: 5.1471; Validation loss: 7.2840
Iteration: 2535; Percent complete: 63.4%; Training loss: 5.0520; Validation loss: 7.5780
Iteration: 2536; Percent complete: 63.4%; Training loss: 5.0735; Validation loss: 7.3891
Iteration: 2537; Perc

Iteration: 2619; Percent complete: 65.5%; Training loss: 4.9935; Validation loss: 7.2994
Iteration: 2620; Percent complete: 65.5%; Training loss: 5.1513; Validation loss: 7.5740
Iteration: 2621; Percent complete: 65.5%; Training loss: 5.2860; Validation loss: 7.6338
Iteration: 2622; Percent complete: 65.5%; Training loss: 5.2340; Validation loss: 7.4593
Iteration: 2623; Percent complete: 65.6%; Training loss: 4.8556; Validation loss: 7.5221
Iteration: 2624; Percent complete: 65.6%; Training loss: 5.0669; Validation loss: 7.4316
Iteration: 2625; Percent complete: 65.6%; Training loss: 4.9431; Validation loss: 7.3915
Iteration: 2626; Percent complete: 65.6%; Training loss: 4.8709; Validation loss: 7.3760
Iteration: 2627; Percent complete: 65.7%; Training loss: 5.2448; Validation loss: 7.2513
Iteration: 2628; Percent complete: 65.7%; Training loss: 5.1193; Validation loss: 7.2329
Iteration: 2629; Percent complete: 65.7%; Training loss: 5.1391; Validation loss: 7.1002
Iteration: 2630; Perc

Iteration: 2712; Percent complete: 67.8%; Training loss: 4.9448; Validation loss: 7.2576
Iteration: 2713; Percent complete: 67.8%; Training loss: 5.3509; Validation loss: 7.7467
Iteration: 2714; Percent complete: 67.8%; Training loss: 5.2289; Validation loss: 7.5461
Iteration: 2715; Percent complete: 67.9%; Training loss: 5.1589; Validation loss: 7.5588
Iteration: 2716; Percent complete: 67.9%; Training loss: 5.3464; Validation loss: 7.2355
Iteration: 2717; Percent complete: 67.9%; Training loss: 5.1525; Validation loss: 7.1502
Iteration: 2718; Percent complete: 68.0%; Training loss: 5.1952; Validation loss: 7.5311
Iteration: 2719; Percent complete: 68.0%; Training loss: 5.3543; Validation loss: 7.3086
Iteration: 2720; Percent complete: 68.0%; Training loss: 5.1291; Validation loss: 7.4369
Iteration: 2721; Percent complete: 68.0%; Training loss: 5.0869; Validation loss: 7.6832
Iteration: 2722; Percent complete: 68.0%; Training loss: 5.1417; Validation loss: 7.5910
Iteration: 2723; Perc

Iteration: 2805; Percent complete: 70.1%; Training loss: 5.0869; Validation loss: 7.4981
Iteration: 2806; Percent complete: 70.2%; Training loss: 4.9104; Validation loss: 7.8269
Iteration: 2807; Percent complete: 70.2%; Training loss: 5.4730; Validation loss: 7.4021
Iteration: 2808; Percent complete: 70.2%; Training loss: 4.8789; Validation loss: 7.3973
Iteration: 2809; Percent complete: 70.2%; Training loss: 5.1085; Validation loss: 7.4576
Iteration: 2810; Percent complete: 70.2%; Training loss: 5.2608; Validation loss: 7.4016
Iteration: 2811; Percent complete: 70.3%; Training loss: 5.0855; Validation loss: 7.6241
Iteration: 2812; Percent complete: 70.3%; Training loss: 4.7585; Validation loss: 7.4875
Iteration: 2813; Percent complete: 70.3%; Training loss: 5.0696; Validation loss: 7.4698
Iteration: 2814; Percent complete: 70.3%; Training loss: 4.7083; Validation loss: 7.8687
Iteration: 2815; Percent complete: 70.4%; Training loss: 5.0324; Validation loss: 7.8304
Iteration: 2816; Perc

Iteration: 2898; Percent complete: 72.5%; Training loss: 5.0268; Validation loss: 7.6410
Iteration: 2899; Percent complete: 72.5%; Training loss: 5.0540; Validation loss: 7.3696
Iteration: 2900; Percent complete: 72.5%; Training loss: 5.0859; Validation loss: 7.9486
Iteration: 2901; Percent complete: 72.5%; Training loss: 5.0372; Validation loss: 7.5666
Iteration: 2902; Percent complete: 72.5%; Training loss: 5.0943; Validation loss: 7.3541
Iteration: 2903; Percent complete: 72.6%; Training loss: 5.2651; Validation loss: 7.7146
Iteration: 2904; Percent complete: 72.6%; Training loss: 5.2475; Validation loss: 7.1187
Iteration: 2905; Percent complete: 72.6%; Training loss: 5.0106; Validation loss: 8.0610
Iteration: 2906; Percent complete: 72.7%; Training loss: 5.0086; Validation loss: 7.9645
Iteration: 2907; Percent complete: 72.7%; Training loss: 5.2329; Validation loss: 7.5706
Iteration: 2908; Percent complete: 72.7%; Training loss: 4.7748; Validation loss: 7.4117
Iteration: 2909; Perc

Iteration: 2991; Percent complete: 74.8%; Training loss: 5.2023; Validation loss: 7.7271
Iteration: 2992; Percent complete: 74.8%; Training loss: 4.9116; Validation loss: 7.4694
Iteration: 2993; Percent complete: 74.8%; Training loss: 5.1303; Validation loss: 7.3449
Iteration: 2994; Percent complete: 74.9%; Training loss: 5.0257; Validation loss: 7.5381
Iteration: 2995; Percent complete: 74.9%; Training loss: 4.9819; Validation loss: 7.3370
Iteration: 2996; Percent complete: 74.9%; Training loss: 5.1304; Validation loss: 7.2733
Iteration: 2997; Percent complete: 74.9%; Training loss: 5.1561; Validation loss: 7.4932
Iteration: 2998; Percent complete: 75.0%; Training loss: 5.2878; Validation loss: 6.9235
Iteration: 2999; Percent complete: 75.0%; Training loss: 5.2896; Validation loss: 7.3010
Iteration: 3000; Percent complete: 75.0%; Training loss: 4.8668; Validation loss: 7.6736
Iteration: 3001; Percent complete: 75.0%; Training loss: 5.3175; Validation loss: 7.1702
Iteration: 3002; Perc

Iteration: 3084; Percent complete: 77.1%; Training loss: 5.1060; Validation loss: 7.2368
Iteration: 3085; Percent complete: 77.1%; Training loss: 5.2107; Validation loss: 7.3994
Iteration: 3086; Percent complete: 77.1%; Training loss: 5.3038; Validation loss: 7.5318
Iteration: 3087; Percent complete: 77.2%; Training loss: 4.9468; Validation loss: 7.6202
Iteration: 3088; Percent complete: 77.2%; Training loss: 4.9774; Validation loss: 7.3027
Iteration: 3089; Percent complete: 77.2%; Training loss: 4.8566; Validation loss: 7.5999
Iteration: 3090; Percent complete: 77.2%; Training loss: 5.2907; Validation loss: 7.3122
Iteration: 3091; Percent complete: 77.3%; Training loss: 5.3028; Validation loss: 7.2813
Iteration: 3092; Percent complete: 77.3%; Training loss: 5.3149; Validation loss: 7.3926
Iteration: 3093; Percent complete: 77.3%; Training loss: 5.2002; Validation loss: 7.2654
Iteration: 3094; Percent complete: 77.3%; Training loss: 4.9450; Validation loss: 7.3272
Iteration: 3095; Perc

Iteration: 3177; Percent complete: 79.4%; Training loss: 5.3312; Validation loss: 7.4997
Iteration: 3178; Percent complete: 79.5%; Training loss: 5.2899; Validation loss: 7.2555
Iteration: 3179; Percent complete: 79.5%; Training loss: 5.2596; Validation loss: 7.2715
Iteration: 3180; Percent complete: 79.5%; Training loss: 5.3041; Validation loss: 7.5195
Iteration: 3181; Percent complete: 79.5%; Training loss: 5.1190; Validation loss: 7.5936
Iteration: 3182; Percent complete: 79.5%; Training loss: 5.1077; Validation loss: 7.4352
Iteration: 3183; Percent complete: 79.6%; Training loss: 5.1333; Validation loss: 7.6198
Iteration: 3184; Percent complete: 79.6%; Training loss: 5.1827; Validation loss: 7.2895
Iteration: 3185; Percent complete: 79.6%; Training loss: 5.1061; Validation loss: 7.6687
Iteration: 3186; Percent complete: 79.7%; Training loss: 4.9276; Validation loss: 7.6618
Iteration: 3187; Percent complete: 79.7%; Training loss: 5.0248; Validation loss: 7.6502
Iteration: 3188; Perc

Iteration: 3270; Percent complete: 81.8%; Training loss: 5.2689; Validation loss: 7.5490
Iteration: 3271; Percent complete: 81.8%; Training loss: 5.0819; Validation loss: 7.4469
Iteration: 3272; Percent complete: 81.8%; Training loss: 5.0207; Validation loss: 7.7106
Iteration: 3273; Percent complete: 81.8%; Training loss: 4.9450; Validation loss: 7.7277
Iteration: 3274; Percent complete: 81.8%; Training loss: 5.0690; Validation loss: 7.7292
Iteration: 3275; Percent complete: 81.9%; Training loss: 5.1852; Validation loss: 7.7579
Iteration: 3276; Percent complete: 81.9%; Training loss: 5.1515; Validation loss: 7.4183
Iteration: 3277; Percent complete: 81.9%; Training loss: 5.0210; Validation loss: 7.5584
Iteration: 3278; Percent complete: 82.0%; Training loss: 5.0070; Validation loss: 7.6682
Iteration: 3279; Percent complete: 82.0%; Training loss: 5.1700; Validation loss: 7.9012
Iteration: 3280; Percent complete: 82.0%; Training loss: 5.0220; Validation loss: 7.4649
Iteration: 3281; Perc

Iteration: 3363; Percent complete: 84.1%; Training loss: 5.0275; Validation loss: 7.5975
Iteration: 3364; Percent complete: 84.1%; Training loss: 5.1359; Validation loss: 7.7501
Iteration: 3365; Percent complete: 84.1%; Training loss: 5.1608; Validation loss: 7.2242
Iteration: 3366; Percent complete: 84.2%; Training loss: 4.8828; Validation loss: 7.6617
Iteration: 3367; Percent complete: 84.2%; Training loss: 4.8998; Validation loss: 7.3077
Iteration: 3368; Percent complete: 84.2%; Training loss: 5.0894; Validation loss: 7.7461
Iteration: 3369; Percent complete: 84.2%; Training loss: 4.9477; Validation loss: 7.6697
Iteration: 3370; Percent complete: 84.2%; Training loss: 5.0763; Validation loss: 7.4431
Iteration: 3371; Percent complete: 84.3%; Training loss: 5.0919; Validation loss: 7.6529
Iteration: 3372; Percent complete: 84.3%; Training loss: 5.4272; Validation loss: 7.6425
Iteration: 3373; Percent complete: 84.3%; Training loss: 5.2062; Validation loss: 7.5309
Iteration: 3374; Perc

Iteration: 3456; Percent complete: 86.4%; Training loss: 4.9441; Validation loss: 7.6524
Iteration: 3457; Percent complete: 86.4%; Training loss: 5.2974; Validation loss: 7.5551
Iteration: 3458; Percent complete: 86.5%; Training loss: 4.7403; Validation loss: 7.4837
Iteration: 3459; Percent complete: 86.5%; Training loss: 5.3141; Validation loss: 7.3881
Iteration: 3460; Percent complete: 86.5%; Training loss: 5.0930; Validation loss: 7.5513
Iteration: 3461; Percent complete: 86.5%; Training loss: 5.0926; Validation loss: 7.2088
Iteration: 3462; Percent complete: 86.6%; Training loss: 5.2012; Validation loss: 7.4317
Iteration: 3463; Percent complete: 86.6%; Training loss: 5.0520; Validation loss: 7.3261
Iteration: 3464; Percent complete: 86.6%; Training loss: 5.1554; Validation loss: 7.7898
Iteration: 3465; Percent complete: 86.6%; Training loss: 5.1690; Validation loss: 7.5685
Iteration: 3466; Percent complete: 86.7%; Training loss: 4.8528; Validation loss: 7.6035
Iteration: 3467; Perc

Iteration: 3549; Percent complete: 88.7%; Training loss: 5.2096; Validation loss: 7.8644
Iteration: 3550; Percent complete: 88.8%; Training loss: 5.1058; Validation loss: 7.4205
Iteration: 3551; Percent complete: 88.8%; Training loss: 5.2444; Validation loss: 7.1448
Iteration: 3552; Percent complete: 88.8%; Training loss: 5.1765; Validation loss: 7.5085
Iteration: 3553; Percent complete: 88.8%; Training loss: 5.0967; Validation loss: 7.0904
Iteration: 3554; Percent complete: 88.8%; Training loss: 5.1850; Validation loss: 7.4009
Iteration: 3555; Percent complete: 88.9%; Training loss: 5.2112; Validation loss: 7.3762
Iteration: 3556; Percent complete: 88.9%; Training loss: 5.1887; Validation loss: 7.6034
Iteration: 3557; Percent complete: 88.9%; Training loss: 5.2738; Validation loss: 7.6576
Iteration: 3558; Percent complete: 88.9%; Training loss: 4.8145; Validation loss: 7.4660
Iteration: 3559; Percent complete: 89.0%; Training loss: 5.1785; Validation loss: 7.5882
Iteration: 3560; Perc

Iteration: 3642; Percent complete: 91.0%; Training loss: 5.1144; Validation loss: 7.4131
Iteration: 3643; Percent complete: 91.1%; Training loss: 5.0127; Validation loss: 7.2218
Iteration: 3644; Percent complete: 91.1%; Training loss: 5.0460; Validation loss: 7.9476
Iteration: 3645; Percent complete: 91.1%; Training loss: 4.8943; Validation loss: 7.4086
Iteration: 3646; Percent complete: 91.1%; Training loss: 4.9477; Validation loss: 7.5701
Iteration: 3647; Percent complete: 91.2%; Training loss: 4.9931; Validation loss: 7.7314
Iteration: 3648; Percent complete: 91.2%; Training loss: 5.0788; Validation loss: 7.4791
Iteration: 3649; Percent complete: 91.2%; Training loss: 5.0628; Validation loss: 7.1873
Iteration: 3650; Percent complete: 91.2%; Training loss: 5.2739; Validation loss: 7.3115
Iteration: 3651; Percent complete: 91.3%; Training loss: 5.1187; Validation loss: 7.3631
Iteration: 3652; Percent complete: 91.3%; Training loss: 5.0280; Validation loss: 7.3694
Iteration: 3653; Perc

Iteration: 3735; Percent complete: 93.4%; Training loss: 5.1141; Validation loss: 7.8173
Iteration: 3736; Percent complete: 93.4%; Training loss: 5.0272; Validation loss: 7.2566
Iteration: 3737; Percent complete: 93.4%; Training loss: 5.2130; Validation loss: 7.5119
Iteration: 3738; Percent complete: 93.5%; Training loss: 5.1715; Validation loss: 7.4300
Iteration: 3739; Percent complete: 93.5%; Training loss: 5.0222; Validation loss: 7.9566
Iteration: 3740; Percent complete: 93.5%; Training loss: 4.9697; Validation loss: 7.5417
Iteration: 3741; Percent complete: 93.5%; Training loss: 5.0893; Validation loss: 7.2328
Iteration: 3742; Percent complete: 93.5%; Training loss: 5.0827; Validation loss: 7.2793
Iteration: 3743; Percent complete: 93.6%; Training loss: 5.2738; Validation loss: 7.3617
Iteration: 3744; Percent complete: 93.6%; Training loss: 5.2547; Validation loss: 7.1987
Iteration: 3745; Percent complete: 93.6%; Training loss: 4.8189; Validation loss: 7.4360
Iteration: 3746; Perc

Iteration: 3828; Percent complete: 95.7%; Training loss: 5.2583; Validation loss: 7.5036
Iteration: 3829; Percent complete: 95.7%; Training loss: 5.0310; Validation loss: 7.4193
Iteration: 3830; Percent complete: 95.8%; Training loss: 5.1333; Validation loss: 7.6142
Iteration: 3831; Percent complete: 95.8%; Training loss: 5.1254; Validation loss: 7.7445
Iteration: 3832; Percent complete: 95.8%; Training loss: 5.2173; Validation loss: 7.4572
Iteration: 3833; Percent complete: 95.8%; Training loss: 5.2195; Validation loss: 7.5020
Iteration: 3834; Percent complete: 95.9%; Training loss: 5.3115; Validation loss: 7.3750
Iteration: 3835; Percent complete: 95.9%; Training loss: 4.9079; Validation loss: 7.5583
Iteration: 3836; Percent complete: 95.9%; Training loss: 5.1973; Validation loss: 7.6615
Iteration: 3837; Percent complete: 95.9%; Training loss: 5.0961; Validation loss: 7.2981
Iteration: 3838; Percent complete: 96.0%; Training loss: 5.0405; Validation loss: 7.3874
Iteration: 3839; Perc

Iteration: 3921; Percent complete: 98.0%; Training loss: 5.2929; Validation loss: 7.7666
Iteration: 3922; Percent complete: 98.0%; Training loss: 5.1495; Validation loss: 7.7524
Iteration: 3923; Percent complete: 98.1%; Training loss: 5.1337; Validation loss: 7.2241
Iteration: 3924; Percent complete: 98.1%; Training loss: 5.1384; Validation loss: 6.9185
Iteration: 3925; Percent complete: 98.1%; Training loss: 4.9437; Validation loss: 7.5617
Iteration: 3926; Percent complete: 98.2%; Training loss: 4.9831; Validation loss: 7.5120
Iteration: 3927; Percent complete: 98.2%; Training loss: 4.9698; Validation loss: 7.9640
Iteration: 3928; Percent complete: 98.2%; Training loss: 5.3163; Validation loss: 7.3512
Iteration: 3929; Percent complete: 98.2%; Training loss: 5.0368; Validation loss: 7.1735
Iteration: 3930; Percent complete: 98.2%; Training loss: 5.0202; Validation loss: 7.3679
Iteration: 3931; Percent complete: 98.3%; Training loss: 4.9041; Validation loss: 7.4794
Iteration: 3932; Perc

In [112]:
PATH=r"C:\\Users\\indra\\OneDrive\\Documents\\Indrani\\UCB\\w266\\chatbot"
print(PATH)
torch.save(model_name, PATH)

C:\\Users\\indra\\OneDrive\\Documents\\Indrani\\UCB\\w266\\chatbot


PermissionError: [Errno 13] Permission denied: 'C:\\\\Users\\\\indra\\\\OneDrive\\\\Documents\\\\Indrani\\\\UCB\\\\w266\\\\chatbot'

In [None]:
import json
def save_current_config(save_model_file_name):
  config_dict = {'start_token':1, 'end_token':2, 'save_model_file_name':save_model_file_name}
  config_filename = PATH + save_model_file_name + '\config.txt'
  with open(config_filename, 'w') as f:
    json.dump(config_dict, f)

In [None]:
save_current_config("")

In [None]:
import time

In [None]:
encoder.eval()
decoder.eval()
searcher = SearchDecoder(encoder, decoder)
# evaluateInput(encoder, decoder, searcher, voc)