In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math

In [2]:
corpus_name = "cornell movie-dialogs corpus"
corpus = os.path.join(r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

printLines(os.path.join(corpus, "movie_lines.txt"))

# Define path to new file
datafile = os.path.join(corpus, "formatted_movie_lines.txt")


b'L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!\n'
b'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!\n'
b'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.\n'
b'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?\n'
b"L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.\n"
b'L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow\n'
b"L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.\n"
b'L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No\n'
b'L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?\n'
b'L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?\n'


In [3]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

In [4]:
class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [5]:
MAX_LENGTH = 10  # Maximum sentence length to consider

# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
    print("Reading lines...")
    # Read the file and split into lines
    lines = open(datafile, encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    voc = Voc(corpus_name)
    return voc, pairs

# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
    # Input sequences need to preserve the last word for EOS token
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

# Filter pairs using filterPair condition
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
    pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs

In [6]:
datafile_validation = os.path.join(corpus, "formatted_movie_lines_valid.txt")

In [7]:
# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)

Start preparing training data ...
Reading lines...
Read 171282 sentence pairs
Trimmed to 49566 sentence pairs
Counting words...
Counted words: 15583


In [8]:
voc_validation, pairs_validation = loadPrepareData(corpus, corpus_name, datafile_validation, save_dir)

Start preparing training data ...
Reading lines...
Read 53424 sentence pairs
Trimmed to 15653 sentence pairs
Counting words...
Counted words: 7808


In [9]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len


In [10]:
def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

In [11]:
# Splits each line of the file into a dictionary of fields
def loadLines(fileName, fields):
    lines = {}
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            lineObj = {}
            for i, field in enumerate(fields):
                lineObj[field] = values[i]
            lines[lineObj['lineID']] = lineObj
    return lines

In [12]:
# Groups fields of lines from `loadLines` into conversations based on *movie_conversations.txt*
def loadConversations(fileName, lines, fields):
    conversations = []
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            values = line.split(" +++$+++ ")
            # Extract fields
            convObj = {}
            for i, field in enumerate(fields):
                convObj[field] = values[i]
            # Convert string to list (convObj["utteranceIDs"] == "['L598485', 'L598486', ...]")
            utterance_id_pattern = re.compile('L[0-9]+')
            lineIds = utterance_id_pattern.findall(convObj["utteranceIDs"])
            # Reassemble lines
            convObj["lines"] = []
            for lineId in lineIds:
                convObj["lines"].append(lines[lineId])
            conversations.append(convObj)
    return conversations

In [13]:
# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
    qa_pairs = []
    for conversation in conversations:
        # Iterate over all the lines of the conversation
        for i in range(len(conversation["lines"]) - 1):  # We ignore the last line (no answer for it)
            inputLine = conversation["lines"][i]["text"].strip()
            targetLine = conversation["lines"][i+1]["text"].strip()
            # Filter wrong samples (if one of the lists is empty)
            if inputLine and targetLine:
                qa_pairs.append([inputLine, targetLine])
    return qa_pairs

In [14]:
# Define path to new file
datafile = r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\formatted_movie_lines.txt"
datafile_validation = r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\formatted_movie_lines_validation.txt"


In [15]:
delimiter = '\t'
# Unescape the delimiter
delimiter = str(codecs.decode(delimiter, "unicode_escape"))

# Initialize lines dict, conversations list, and field ids
lines = {}
conversations = []
MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]

In [16]:
 # Load lines and process conversations
print("\nProcessing corpus...")
lines = loadLines(r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\movie_lines.txt", MOVIE_LINES_FIELDS)
print("\nLoading conversations...")
conversations = loadConversations(r"C:\Users\indra\OneDrive\Documents\Indrani\UCB\w266\chatbot\data\cornell movie-dialogs corpus\movie_conversations.txt",
                              lines, MOVIE_CONVERSATIONS_FIELDS)

# Write new csv file
number = 1
print("\nWriting Validation formatted file...")
with open(datafile_validation, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')

    for pair in extractSentencePairs(conversations):
        if number <= 50000:
            writer.writerow(pair)
        number += 1

number = 1
print("\nWriting Training  formatted file...")
with open(datafile, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')
    for pair in extractSentencePairs(conversations):
        if number > 50000:
            writer.writerow(pair)
        number += 1



Processing corpus...

Loading conversations...

Writing Validation formatted file...

Writing Training  formatted file...


In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF ,LatentDirichletAllocation
from sklearn.pipeline import Pipeline
from sklearn.datasets import fetch_20newsgroups
import numpy as np

In [18]:
def save_model(dictionary, feature_names, filename):
    with open(filename, "wb") as f:
        np.savez(f, dictionary=dictionary, feature_names=feature_names)
        
def load_model(filename):
    """load model saved with save_model. Returns a tupe of dictionary, feature_names"""
    with open(filename, "rb") as f:
        arrs = np.load(f)
        return arrs["dictionary"], arrs["feature_names"]
    

def print_top_words(dictionary, feature_names, n_top_words):
    """print the top n_top_words from each topic in the dictionary matrix
    of dictionary. Feature names is a list of words with indices corresponding
    to the columns of the dictionary"""
    for topic_idx, topic in enumerate(dictionary):
        message = "Topic #%d: " % topic_idx
        message += " ".join([feature_names[i]
                             for i in topic.argsort()[:-n_top_words - 1:-1]])
        print(message)
    print()


In [19]:
def train_model(data, n_features, n_components):
    model = Pipeline(steps=[("tfidf", TfidfVectorizer(max_df=0.95, min_df=2,
                                                      max_features=n_features, stop_words='english')),
                            ("lda", LatentDirichletAllocation(n_components=n_components, random_state=1))])
    model.fit(data)
    features = model["tfidf"].get_feature_names()
    dictionary = model["lda"].components_
    return dictionary, features

def train_save_model(data, n_features, n_components, filename):
    dictionary, features = train_model(data, n_features, n_components)
    save_model(dictionary, features, filename)
    return dictionary, features        

def load_news():
    dataset = fetch_20newsgroups(shuffle=True, random_state=1,
                                 remove=('headers', 'footers', 'quotes'))
    data_samples = dataset.data
    return data_samples   


In [20]:
# number of top words to use in tfidf representation of data
n_features = 1000
# number of topics in LDA
n_components = 10

# train model and save it
print("Loading, training, and saving 20 news groups model")
news_data = load_news()
news_dict, news_feats = train_save_model(news_data, n_features, n_components, "data/topic/news-lda.npz")
print_top_words(news_dict, news_feats, 10)

Loading, training, and saving 20 news groups model
Topic #0: 00 10 11 1993 15 12 23 13 apr 17
Topic #1: god people think jesus don say believe just does know
Topic #2: windows window file program use files using edu server motif
Topic #3: government people key israel gun law don state encryption public
Topic #4: drive card mac software video monitor use windows pc scsi
Topic #5: space nasa new orbit earth air years launch research water
Topic #6: game team year games season players play hockey league win
Topic #7: mail phone address thanks sale edu email interested 00 price
Topic #8: just like car don bike good know think ve time
Topic #9: thanks know mail looking post hi info faq help appreciated



In [21]:
USE_CUDA = torch.cuda.is_available()
print("USE_CUDA", USE_CUDA)
if USE_CUDA:
    device = torch.device("cuda")

USE_CUDA True


In [22]:
def calculate_codes(topic_for_code, input_seq_for_code, voc, feature_path, batch_size):
    # batch_size = 64 for training, 1 for chatting
    ldadict = np.load(feature_path)["feature_names"]
    new_input_seq = torch.zeros(batch_size, len(ldadict))

    for i in range(batch_size):
        for j in range(len(input_seq_for_code[i])):
            input_seq_words = voc.index2word[input_seq_for_code[i][j].item()]
            for check_index in range(len(ldadict)):
                if ldadict[check_index] == input_seq_words:
                    new_input_seq[i][check_index] = 1

    three_d_topic = topic_for_code.repeat(batch_size, 1, 1).to(device)
    three_d_q = new_input_seq.repeat(1, 1, 1).permute(1, 2, 0).to(device)

    return torch.bmm(three_d_topic, three_d_q)


In [23]:
class EncoderRNN(nn.Module):
    def __init__(self,
                 hidden_size,
                 embedding,
                 topics,
                 n_layers=1,
                 dropout=0,
                 batch_size=64):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding
        self.topics = topics
        self.batch_size = batch_size
        # self.voc = voc
        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        self.gru = nn.GRU(hidden_size,
                          hidden_size,
                          n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        #print(input_seq)
        batch_size = 1
        #get the codes first
        input_seq_for_code = input_seq.transpose(0,1)
        topic_for_code = self.topics
        feature_path = r"data/topic/news-lda.npz"
        codes = calculate_codes(topic_for_code, input_seq_for_code, voc, feature_path, self.batch_size)
        #print(codes.size())
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden, codes


In [24]:
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)


In [25]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [26]:
def validation(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden, codes = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    teacher_forcing_ratio = 1.0
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    return sum(print_losses) / n_totals


In [27]:
def train(input_variable,
          lengths,
          target_variable,
          mask,
          max_target_len,
          encoder,
          decoder,
          embedding,
          encoder_optimizer,
          decoder_optimizer,
          batch_size,
          clip,
          max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden, codes = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    #decoder_hidden = encoder_hidden[:decoder.n_layers]
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    teacher_forcing_ratio = 1.0
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):

            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs, codes, batch_size
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals


In [28]:
def trainIters(model_name,
               voc,
               voc_validation,
               pairs,
               pairs_validation,
               encoder,
               decoder,
               encoder_optimizer,
               decoder_optimizer,
               embedding,
               encoder_n_layers,
               decoder_n_layers,
               save_dir,
               n_iteration,
               batch_size,
               print_every,
               save_every,
               clip,
               corpus_name,
               loadFilename,
               DICT_NAME,
               checkpoint):

    #history record file
    history_file = open(r'history_delta.txt', 'w')

    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    training_batches_validation = [batch2TrainData(voc_validation, [random.choice(pairs_validation) for _ in range(batch_size)])
                        for _ in range(n_iteration)]

    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    print_loss_validation = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        training_batch_validation = training_batches_validation[iteration - 1]

        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch
        input_variable_validation, lengths_validation, target_variable_validation, mask_validation, max_target_len_validation = training_batch_validation

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss
        loss_validation = validation(input_variable_validation, lengths_validation, target_variable_validation, mask_validation, max_target_len_validation, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss_validation += loss_validation

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print_loss_avg_validation = print_loss_validation / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Training loss: {:.4f}; Validation loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg, print_loss_avg_validation))
            history_file.write('{}'.format(print_loss_avg))
            history_file.write(' ')
            history_file.write('{}'.format(print_loss_avg_validation))
            history_file.write("\n")

            print_loss = 0
            print_loss_validation = 0



In [32]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher, voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            start_time = time.time()
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Bot:', ' '.join(output_words))
            end_time = time.time()
            print('Responding time:', end_time - start_time)
        except KeyError:
            print("Error: Encountered unknown word.")


In [33]:
def rand_pick(probabilities):
    x=random.uniform(0,1)
    #print(probabilities[0][20])
    cumulative_probability = 0.0
    #list = [n for n in range(len(probabilities))]
    for item in range(len(probabilities[0])):
        cumulative_probability += probabilities[0][item]
        if x < cumulative_probability:
            break
    item_list = torch.zeros([1], device=device, dtype=torch.long)
    item_list[0] = item
    score = torch.zeros([1],device=device)
    score[0]=probabilities[0][item]
    
    return item_list, score

class SearchDecoder(nn.Module):
    def __init__(self, encoder, decoder, batch_size=1):
        super(SearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.batch_size = batch_size

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden, codes = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:self.decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input,
                                                          decoder_hidden,
                                                          encoder_outputs,
                                                          codes,
                                                          self.batch_size)
            # Obtain word based on probability distribution token and its softmax score
            decoder_input, decoder_scores = my_random_pick(decoder_output)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores


In [34]:
class TopicAttention(nn.Module):
    def __init__(self, topic_vocab_size, enc_hid_dim, dec_hid_dim):
        super(TopicAttention, self).__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim

        self.attn = nn.Linear(topic_vocab_size + dec_hid_dim + enc_hid_dim, dec_hid_dim)
        self.v = nn.Parameter(torch.rand(dec_hid_dim))

    def forward(self, hidden, topic_dict, enc_hidden):
        batch_size = enc_hidden.shape[0]
        num_topics = topic_dict.shape[0]

        hidden = hidden.repeat(num_topics, 1, 1).permute(1, 0, 2)
        enc_hidden = enc_hidden.repeat(num_topics, 1, 1).permute(1, 0, 2)
        topic_dict = topic_dict.repeat(batch_size, 1, 1)

        
        energy = torch.tanh(self.attn(torch.cat((hidden, topic_dict, enc_hidden), dim=2)))

        # energy = [batch_size, dec_hid_dim]
        energy = energy.permute(0, 2, 1)

        v = self.v.repeat(batch_size, 1).unsqueeze(1)

        # v = [batch_size, 1, dec_hid_dim]
        attention = torch.bmm(v, energy).squeeze(1)

        return F.softmax(attention, dim=1).unsqueeze(1)


In [35]:
class TopicDecoder(nn.Module):
    def __init__(self,
                 attn_model,
                 embedding,
                 hidden_size,
                 output_size,
                 enc_hid_dim,
                 dec_hid_dim,
                 topics,
                 topic_vocab_size,
                 n_layers=1,
                 dropout=0.1,
                 batch_size=1):
        super(TopicDecoder, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.topics = topics
        self.topic_vocab_size = topic_vocab_size
        self.batch_size = batch_size

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2 + topic_vocab_size + 1000, hidden_size) #1000 is len of topic dictionary
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)
        self.topic_attn = TopicAttention(topic_vocab_size, enc_hid_dim, dec_hid_dim)

    def forward(self,
                input_step,
                last_hidden,
                encoder_outputs,
                codes,
                batch_size=1):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)

        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        topic_attn_weights = self.topic_attn(last_hidden, self.topics, encoder_outputs[-1])

        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        encoder_outputs = encoder_outputs.permute(1,0,2)
      
        context = torch.bmm(attn_weights, encoder_outputs)
     
        topic_context = torch.bmm(topic_attn_weights, self.topics.repeat(batch_size, 1, 1))
       
        topic_for_Pk = self.topics.repeat(batch_size,1,1).permute(0,2,1)
        # print('topic_for_Pk',topic_for_Pk.size())
        Pk_context = torch.bmm(topic_for_Pk, codes)
        # print('Pk_context',Pk_context.size())

        # Concatenate weighted context vector and GRU output using Luong eq. 5
       
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        topic_context = topic_context.squeeze(1)
      
        Pk_context = Pk_context.squeeze(2)
        concat_input = torch.cat((rnn_output, context, topic_context, Pk_context), 1)
        # print('concat_input', concat_input.size())
        concat_output = torch.tanh(self.concat(concat_input))

        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output, hidden

In [41]:
# Configure models
model_name = 'topic_model'
attn_model = 'dot'
hidden_size = 500
encoder_n_layers = 1
decoder_n_layers = 1
dropout = 0.1
batch_size = 1 # 64 for training, 1 for chatting

In [37]:
DICT_NAME = 'data/topic/news'
DICT_PATH = DICT_NAME + '-lda.npz'
print("DICT_PATH",DICT_PATH)
topic_dict = torch.tensor(np.load(DICT_PATH)["dictionary"], dtype=torch.float).to(device)

DICT_PATH data/topic/news-lda.npz


In [38]:
# Set checkpoint to load from
loadFilename = None
checkpoint_iter = 64000

In [39]:
print('Building encoder and decoder ...')
# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)

# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size=hidden_size,
                             embedding=embedding,
                             topics=topic_dict,
                             n_layers=encoder_n_layers,
                             dropout=dropout,
                             batch_size=batch_size)

enc_hid_dim, dec_hid_dim, emb_dim = hidden_size, hidden_size, hidden_size

decoder = TopicDecoder(attn_model,
                                  embedding,
                                  hidden_size,
                                  voc.num_words,
                                  enc_hid_dim,
                                  dec_hid_dim,
                                  topic_dict,
                                  topic_dict.shape[1],
                                  decoder_n_layers,
                                  dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)

# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


In [229]:
#start training
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 10000
print_every = 1
save_every = 100

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have cuda, configure cuda to call
for state in encoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

for state in decoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

# Run training iterations
print("Starting Training!")
trainIters(model_name=model_name,
                   voc=voc,
                   voc_validation=voc_validation,
                   pairs=pairs,
                   pairs_validation=pairs_validation,
                   encoder=encoder,
                   decoder=decoder,
                   encoder_optimizer=encoder_optimizer,
                   decoder_optimizer=decoder_optimizer,
                   embedding=embedding,
                   encoder_n_layers=encoder_n_layers,
                   decoder_n_layers=decoder_n_layers,
                   save_dir=save_dir,
                   n_iteration=n_iteration,
                   batch_size=batch_size,
                   print_every=print_every,
                   save_every=save_every,
                   clip=clip,
                   corpus_name=corpus_name,
                   loadFilename=loadFilename,
                   DICT_NAME=DICT_NAME,
                   checkpoint=None)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Training loss: 9.6455; Validation loss: 9.4591
Iteration: 2; Percent complete: 0.0%; Training loss: 9.3333; Validation loss: 9.2758
Iteration: 3; Percent complete: 0.0%; Training loss: 8.8919; Validation loss: 8.8792
Iteration: 4; Percent complete: 0.0%; Training loss: 8.4588; Validation loss: 8.8420
Iteration: 5; Percent complete: 0.1%; Training loss: 8.0139; Validation loss: 8.3881
Iteration: 6; Percent complete: 0.1%; Training loss: 7.5489; Validation loss: 8.2012
Iteration: 7; Percent complete: 0.1%; Training loss: 7.2515; Validation loss: 8.0170
Iteration: 8; Percent complete: 0.1%; Training loss: 7.3038; Validation loss: 8.0796
Iteration: 9; Percent complete: 0.1%; Training loss: 7.2709; Validation loss: 7.9820
Iteration: 10; Percent complete: 0.1%; Training loss: 7.1980; Validation loss: 7.9168
Iteration: 11; Percent complete: 0.1%; Training loss: 6.6402; Validation loss

Iteration: 96; Percent complete: 1.0%; Training loss: 4.4387; Validation loss: 7.4025
Iteration: 97; Percent complete: 1.0%; Training loss: 4.7317; Validation loss: 7.3248
Iteration: 98; Percent complete: 1.0%; Training loss: 4.9077; Validation loss: 7.4756
Iteration: 99; Percent complete: 1.0%; Training loss: 4.3607; Validation loss: 7.7515
Iteration: 100; Percent complete: 1.0%; Training loss: 4.7195; Validation loss: 7.3063
Iteration: 101; Percent complete: 1.0%; Training loss: 4.5413; Validation loss: 7.3426
Iteration: 102; Percent complete: 1.0%; Training loss: 4.6390; Validation loss: 7.4348
Iteration: 103; Percent complete: 1.0%; Training loss: 4.7192; Validation loss: 7.3067
Iteration: 104; Percent complete: 1.0%; Training loss: 4.4623; Validation loss: 7.6797
Iteration: 105; Percent complete: 1.1%; Training loss: 4.7238; Validation loss: 7.3014
Iteration: 106; Percent complete: 1.1%; Training loss: 4.4814; Validation loss: 7.3493
Iteration: 107; Percent complete: 1.1%; Trainin

Iteration: 191; Percent complete: 1.9%; Training loss: 4.3964; Validation loss: 7.9794
Iteration: 192; Percent complete: 1.9%; Training loss: 4.2471; Validation loss: 7.7313
Iteration: 193; Percent complete: 1.9%; Training loss: 4.2205; Validation loss: 7.8005
Iteration: 194; Percent complete: 1.9%; Training loss: 4.5411; Validation loss: 7.6419
Iteration: 195; Percent complete: 1.9%; Training loss: 4.2252; Validation loss: 7.4914
Iteration: 196; Percent complete: 2.0%; Training loss: 4.3122; Validation loss: 7.9124
Iteration: 197; Percent complete: 2.0%; Training loss: 4.2970; Validation loss: 7.6416
Iteration: 198; Percent complete: 2.0%; Training loss: 4.3420; Validation loss: 7.7646
Iteration: 199; Percent complete: 2.0%; Training loss: 4.3072; Validation loss: 7.6221
Iteration: 200; Percent complete: 2.0%; Training loss: 4.3304; Validation loss: 7.6534
Iteration: 201; Percent complete: 2.0%; Training loss: 4.1609; Validation loss: 8.0615
Iteration: 202; Percent complete: 2.0%; Tra

Iteration: 286; Percent complete: 2.9%; Training loss: 4.4024; Validation loss: 8.1242
Iteration: 287; Percent complete: 2.9%; Training loss: 4.1641; Validation loss: 7.6705
Iteration: 288; Percent complete: 2.9%; Training loss: 4.2176; Validation loss: 7.9662
Iteration: 289; Percent complete: 2.9%; Training loss: 4.2568; Validation loss: 7.8198
Iteration: 290; Percent complete: 2.9%; Training loss: 3.8219; Validation loss: 8.0065
Iteration: 291; Percent complete: 2.9%; Training loss: 4.5140; Validation loss: 7.6661
Iteration: 292; Percent complete: 2.9%; Training loss: 3.9985; Validation loss: 8.0263
Iteration: 293; Percent complete: 2.9%; Training loss: 4.0507; Validation loss: 8.2324
Iteration: 294; Percent complete: 2.9%; Training loss: 4.2387; Validation loss: 7.8806
Iteration: 295; Percent complete: 2.9%; Training loss: 4.1276; Validation loss: 7.7369
Iteration: 296; Percent complete: 3.0%; Training loss: 4.5105; Validation loss: 7.9131
Iteration: 297; Percent complete: 3.0%; Tra

Iteration: 381; Percent complete: 3.8%; Training loss: 4.0023; Validation loss: 8.1190
Iteration: 382; Percent complete: 3.8%; Training loss: 3.8159; Validation loss: 8.0653
Iteration: 383; Percent complete: 3.8%; Training loss: 3.9617; Validation loss: 7.8170
Iteration: 384; Percent complete: 3.8%; Training loss: 4.0154; Validation loss: 7.9074
Iteration: 385; Percent complete: 3.9%; Training loss: 4.2363; Validation loss: 8.0455
Iteration: 386; Percent complete: 3.9%; Training loss: 3.9401; Validation loss: 7.7656
Iteration: 387; Percent complete: 3.9%; Training loss: 4.1137; Validation loss: 8.0834
Iteration: 388; Percent complete: 3.9%; Training loss: 4.2651; Validation loss: 7.9125
Iteration: 389; Percent complete: 3.9%; Training loss: 4.1861; Validation loss: 7.6268
Iteration: 390; Percent complete: 3.9%; Training loss: 3.8006; Validation loss: 7.7826
Iteration: 391; Percent complete: 3.9%; Training loss: 3.9662; Validation loss: 7.7376
Iteration: 392; Percent complete: 3.9%; Tra

Iteration: 476; Percent complete: 4.8%; Training loss: 4.2459; Validation loss: 7.9791
Iteration: 477; Percent complete: 4.8%; Training loss: 4.0802; Validation loss: 8.0817
Iteration: 478; Percent complete: 4.8%; Training loss: 3.8020; Validation loss: 8.2114
Iteration: 479; Percent complete: 4.8%; Training loss: 3.7107; Validation loss: 8.0432
Iteration: 480; Percent complete: 4.8%; Training loss: 3.7066; Validation loss: 8.0205
Iteration: 481; Percent complete: 4.8%; Training loss: 4.0872; Validation loss: 7.4877
Iteration: 482; Percent complete: 4.8%; Training loss: 4.2383; Validation loss: 7.9062
Iteration: 483; Percent complete: 4.8%; Training loss: 3.5953; Validation loss: 7.6310
Iteration: 484; Percent complete: 4.8%; Training loss: 3.8968; Validation loss: 8.0068
Iteration: 485; Percent complete: 4.9%; Training loss: 3.7303; Validation loss: 8.0869
Iteration: 486; Percent complete: 4.9%; Training loss: 4.2225; Validation loss: 8.1007
Iteration: 487; Percent complete: 4.9%; Tra

Iteration: 571; Percent complete: 5.7%; Training loss: 4.0454; Validation loss: 7.7755
Iteration: 572; Percent complete: 5.7%; Training loss: 3.9558; Validation loss: 7.5738
Iteration: 573; Percent complete: 5.7%; Training loss: 3.7974; Validation loss: 8.0855
Iteration: 574; Percent complete: 5.7%; Training loss: 3.8567; Validation loss: 8.1236
Iteration: 575; Percent complete: 5.8%; Training loss: 3.8265; Validation loss: 8.3018
Iteration: 576; Percent complete: 5.8%; Training loss: 4.1372; Validation loss: 7.8898
Iteration: 577; Percent complete: 5.8%; Training loss: 3.8945; Validation loss: 8.0722
Iteration: 578; Percent complete: 5.8%; Training loss: 3.5881; Validation loss: 8.3206
Iteration: 579; Percent complete: 5.8%; Training loss: 3.8544; Validation loss: 7.9423
Iteration: 580; Percent complete: 5.8%; Training loss: 4.0775; Validation loss: 8.0815
Iteration: 581; Percent complete: 5.8%; Training loss: 3.7619; Validation loss: 7.9622
Iteration: 582; Percent complete: 5.8%; Tra

Iteration: 666; Percent complete: 6.7%; Training loss: 3.8661; Validation loss: 8.4723
Iteration: 667; Percent complete: 6.7%; Training loss: 3.9494; Validation loss: 8.2948
Iteration: 668; Percent complete: 6.7%; Training loss: 3.9559; Validation loss: 7.7884
Iteration: 669; Percent complete: 6.7%; Training loss: 3.8376; Validation loss: 8.3454
Iteration: 670; Percent complete: 6.7%; Training loss: 3.8264; Validation loss: 7.8804
Iteration: 671; Percent complete: 6.7%; Training loss: 3.7992; Validation loss: 8.0936
Iteration: 672; Percent complete: 6.7%; Training loss: 3.7869; Validation loss: 8.1530
Iteration: 673; Percent complete: 6.7%; Training loss: 3.9674; Validation loss: 8.0971
Iteration: 674; Percent complete: 6.7%; Training loss: 3.6443; Validation loss: 8.3721
Iteration: 675; Percent complete: 6.8%; Training loss: 3.6076; Validation loss: 8.1921
Iteration: 676; Percent complete: 6.8%; Training loss: 3.5560; Validation loss: 8.0214
Iteration: 677; Percent complete: 6.8%; Tra

Iteration: 761; Percent complete: 7.6%; Training loss: 3.6664; Validation loss: 7.8955
Iteration: 762; Percent complete: 7.6%; Training loss: 3.5877; Validation loss: 8.2620
Iteration: 763; Percent complete: 7.6%; Training loss: 3.6054; Validation loss: 8.1240
Iteration: 764; Percent complete: 7.6%; Training loss: 3.8811; Validation loss: 7.8759
Iteration: 765; Percent complete: 7.6%; Training loss: 3.5931; Validation loss: 7.8660
Iteration: 766; Percent complete: 7.7%; Training loss: 3.5658; Validation loss: 8.1656
Iteration: 767; Percent complete: 7.7%; Training loss: 3.6993; Validation loss: 7.9979
Iteration: 768; Percent complete: 7.7%; Training loss: 3.8138; Validation loss: 8.0163
Iteration: 769; Percent complete: 7.7%; Training loss: 3.4459; Validation loss: 7.8277
Iteration: 770; Percent complete: 7.7%; Training loss: 3.7077; Validation loss: 8.0752
Iteration: 771; Percent complete: 7.7%; Training loss: 3.4760; Validation loss: 8.2771
Iteration: 772; Percent complete: 7.7%; Tra

Iteration: 856; Percent complete: 8.6%; Training loss: 3.6188; Validation loss: 7.9673
Iteration: 857; Percent complete: 8.6%; Training loss: 3.6389; Validation loss: 8.0102
Iteration: 858; Percent complete: 8.6%; Training loss: 3.6478; Validation loss: 8.0836
Iteration: 859; Percent complete: 8.6%; Training loss: 3.7859; Validation loss: 8.0711
Iteration: 860; Percent complete: 8.6%; Training loss: 3.4818; Validation loss: 8.0819
Iteration: 861; Percent complete: 8.6%; Training loss: 3.6460; Validation loss: 7.7636
Iteration: 862; Percent complete: 8.6%; Training loss: 3.5575; Validation loss: 8.2086
Iteration: 863; Percent complete: 8.6%; Training loss: 3.3018; Validation loss: 8.2039
Iteration: 864; Percent complete: 8.6%; Training loss: 3.7676; Validation loss: 8.1327
Iteration: 865; Percent complete: 8.6%; Training loss: 3.7618; Validation loss: 8.1706
Iteration: 866; Percent complete: 8.7%; Training loss: 3.4617; Validation loss: 7.9919
Iteration: 867; Percent complete: 8.7%; Tra

Iteration: 951; Percent complete: 9.5%; Training loss: 3.9609; Validation loss: 8.2077
Iteration: 952; Percent complete: 9.5%; Training loss: 3.6721; Validation loss: 8.2424
Iteration: 953; Percent complete: 9.5%; Training loss: 3.5600; Validation loss: 8.3544
Iteration: 954; Percent complete: 9.5%; Training loss: 3.6330; Validation loss: 8.3700
Iteration: 955; Percent complete: 9.6%; Training loss: 3.4962; Validation loss: 8.4136
Iteration: 956; Percent complete: 9.6%; Training loss: 3.8340; Validation loss: 8.2152
Iteration: 957; Percent complete: 9.6%; Training loss: 3.6441; Validation loss: 8.2088
Iteration: 958; Percent complete: 9.6%; Training loss: 3.5292; Validation loss: 7.9252
Iteration: 959; Percent complete: 9.6%; Training loss: 3.5595; Validation loss: 8.1633
Iteration: 960; Percent complete: 9.6%; Training loss: 3.7330; Validation loss: 8.4296
Iteration: 961; Percent complete: 9.6%; Training loss: 3.9050; Validation loss: 8.2008
Iteration: 962; Percent complete: 9.6%; Tra

Iteration: 1045; Percent complete: 10.4%; Training loss: 3.5330; Validation loss: 7.9861
Iteration: 1046; Percent complete: 10.5%; Training loss: 3.5662; Validation loss: 8.2296
Iteration: 1047; Percent complete: 10.5%; Training loss: 3.7401; Validation loss: 8.2032
Iteration: 1048; Percent complete: 10.5%; Training loss: 3.6165; Validation loss: 8.3043
Iteration: 1049; Percent complete: 10.5%; Training loss: 3.6786; Validation loss: 8.1379
Iteration: 1050; Percent complete: 10.5%; Training loss: 3.5971; Validation loss: 8.1500
Iteration: 1051; Percent complete: 10.5%; Training loss: 3.4988; Validation loss: 8.1044
Iteration: 1052; Percent complete: 10.5%; Training loss: 3.6437; Validation loss: 8.4433
Iteration: 1053; Percent complete: 10.5%; Training loss: 3.4717; Validation loss: 8.2087
Iteration: 1054; Percent complete: 10.5%; Training loss: 3.6786; Validation loss: 8.1614
Iteration: 1055; Percent complete: 10.5%; Training loss: 3.4450; Validation loss: 8.1901
Iteration: 1056; Perc

Iteration: 1138; Percent complete: 11.4%; Training loss: 3.5139; Validation loss: 8.3902
Iteration: 1139; Percent complete: 11.4%; Training loss: 3.7122; Validation loss: 8.2280
Iteration: 1140; Percent complete: 11.4%; Training loss: 3.9427; Validation loss: 8.2897
Iteration: 1141; Percent complete: 11.4%; Training loss: 3.7413; Validation loss: 8.4010
Iteration: 1142; Percent complete: 11.4%; Training loss: 3.5823; Validation loss: 8.0871
Iteration: 1143; Percent complete: 11.4%; Training loss: 3.1929; Validation loss: 8.3756
Iteration: 1144; Percent complete: 11.4%; Training loss: 3.4710; Validation loss: 8.0731
Iteration: 1145; Percent complete: 11.5%; Training loss: 3.6347; Validation loss: 8.1628
Iteration: 1146; Percent complete: 11.5%; Training loss: 3.5054; Validation loss: 8.0726
Iteration: 1147; Percent complete: 11.5%; Training loss: 3.2754; Validation loss: 8.2958
Iteration: 1148; Percent complete: 11.5%; Training loss: 3.4719; Validation loss: 8.4480
Iteration: 1149; Perc

Iteration: 1231; Percent complete: 12.3%; Training loss: 3.6327; Validation loss: 8.3065
Iteration: 1232; Percent complete: 12.3%; Training loss: 3.5765; Validation loss: 7.9023
Iteration: 1233; Percent complete: 12.3%; Training loss: 3.4969; Validation loss: 8.3385
Iteration: 1234; Percent complete: 12.3%; Training loss: 3.5431; Validation loss: 8.1625
Iteration: 1235; Percent complete: 12.3%; Training loss: 3.6871; Validation loss: 7.9803
Iteration: 1236; Percent complete: 12.4%; Training loss: 3.3777; Validation loss: 8.4198
Iteration: 1237; Percent complete: 12.4%; Training loss: 3.5889; Validation loss: 8.4074
Iteration: 1238; Percent complete: 12.4%; Training loss: 3.3790; Validation loss: 8.3986
Iteration: 1239; Percent complete: 12.4%; Training loss: 3.6271; Validation loss: 7.8835
Iteration: 1240; Percent complete: 12.4%; Training loss: 3.4280; Validation loss: 8.3572
Iteration: 1241; Percent complete: 12.4%; Training loss: 3.7530; Validation loss: 8.4915
Iteration: 1242; Perc

Iteration: 1324; Percent complete: 13.2%; Training loss: 3.4922; Validation loss: 8.3989
Iteration: 1325; Percent complete: 13.2%; Training loss: 3.6421; Validation loss: 7.9714
Iteration: 1326; Percent complete: 13.3%; Training loss: 3.6080; Validation loss: 8.4790
Iteration: 1327; Percent complete: 13.3%; Training loss: 3.4545; Validation loss: 8.1775
Iteration: 1328; Percent complete: 13.3%; Training loss: 3.4296; Validation loss: 8.2044
Iteration: 1329; Percent complete: 13.3%; Training loss: 3.1823; Validation loss: 8.5431
Iteration: 1330; Percent complete: 13.3%; Training loss: 3.5307; Validation loss: 8.6950
Iteration: 1331; Percent complete: 13.3%; Training loss: 3.6611; Validation loss: 8.5590
Iteration: 1332; Percent complete: 13.3%; Training loss: 3.4436; Validation loss: 8.4772
Iteration: 1333; Percent complete: 13.3%; Training loss: 3.2864; Validation loss: 8.6754
Iteration: 1334; Percent complete: 13.3%; Training loss: 3.6030; Validation loss: 8.5680
Iteration: 1335; Perc

Iteration: 1417; Percent complete: 14.2%; Training loss: 3.4871; Validation loss: 8.3783
Iteration: 1418; Percent complete: 14.2%; Training loss: 3.6751; Validation loss: 8.4189
Iteration: 1419; Percent complete: 14.2%; Training loss: 3.5293; Validation loss: 8.4716
Iteration: 1420; Percent complete: 14.2%; Training loss: 3.6493; Validation loss: 8.0153
Iteration: 1421; Percent complete: 14.2%; Training loss: 3.2083; Validation loss: 8.0351
Iteration: 1422; Percent complete: 14.2%; Training loss: 3.4011; Validation loss: 8.3071
Iteration: 1423; Percent complete: 14.2%; Training loss: 3.4256; Validation loss: 8.2480
Iteration: 1424; Percent complete: 14.2%; Training loss: 3.3716; Validation loss: 8.2876
Iteration: 1425; Percent complete: 14.2%; Training loss: 3.4010; Validation loss: 8.4108
Iteration: 1426; Percent complete: 14.3%; Training loss: 3.1845; Validation loss: 8.2411
Iteration: 1427; Percent complete: 14.3%; Training loss: 3.6170; Validation loss: 8.0739
Iteration: 1428; Perc

Iteration: 1510; Percent complete: 15.1%; Training loss: 3.2930; Validation loss: 8.1091
Iteration: 1511; Percent complete: 15.1%; Training loss: 3.3557; Validation loss: 7.8682
Iteration: 1512; Percent complete: 15.1%; Training loss: 3.3803; Validation loss: 8.2846
Iteration: 1513; Percent complete: 15.1%; Training loss: 3.1613; Validation loss: 8.4533
Iteration: 1514; Percent complete: 15.1%; Training loss: 3.5575; Validation loss: 8.5413
Iteration: 1515; Percent complete: 15.2%; Training loss: 3.5024; Validation loss: 8.2682
Iteration: 1516; Percent complete: 15.2%; Training loss: 3.6012; Validation loss: 8.4186
Iteration: 1517; Percent complete: 15.2%; Training loss: 3.3435; Validation loss: 8.0624
Iteration: 1518; Percent complete: 15.2%; Training loss: 3.6188; Validation loss: 8.5213
Iteration: 1519; Percent complete: 15.2%; Training loss: 3.3106; Validation loss: 8.7360
Iteration: 1520; Percent complete: 15.2%; Training loss: 3.1713; Validation loss: 8.5637
Iteration: 1521; Perc

Iteration: 1603; Percent complete: 16.0%; Training loss: 3.1807; Validation loss: 8.1383
Iteration: 1604; Percent complete: 16.0%; Training loss: 3.4150; Validation loss: 8.6746
Iteration: 1605; Percent complete: 16.1%; Training loss: 3.2817; Validation loss: 8.6443
Iteration: 1606; Percent complete: 16.1%; Training loss: 3.3663; Validation loss: 8.4509
Iteration: 1607; Percent complete: 16.1%; Training loss: 3.1677; Validation loss: 8.2779
Iteration: 1608; Percent complete: 16.1%; Training loss: 3.6645; Validation loss: 8.3683
Iteration: 1609; Percent complete: 16.1%; Training loss: 3.2462; Validation loss: 8.1517
Iteration: 1610; Percent complete: 16.1%; Training loss: 3.2668; Validation loss: 8.2379
Iteration: 1611; Percent complete: 16.1%; Training loss: 3.3861; Validation loss: 8.4125
Iteration: 1612; Percent complete: 16.1%; Training loss: 3.2889; Validation loss: 8.2711
Iteration: 1613; Percent complete: 16.1%; Training loss: 3.3556; Validation loss: 8.3213
Iteration: 1614; Perc

Iteration: 1696; Percent complete: 17.0%; Training loss: 3.4404; Validation loss: 8.5679
Iteration: 1697; Percent complete: 17.0%; Training loss: 3.4420; Validation loss: 8.5133
Iteration: 1698; Percent complete: 17.0%; Training loss: 3.3507; Validation loss: 8.6451
Iteration: 1699; Percent complete: 17.0%; Training loss: 3.2208; Validation loss: 8.3087
Iteration: 1700; Percent complete: 17.0%; Training loss: 3.3684; Validation loss: 8.5689
Iteration: 1701; Percent complete: 17.0%; Training loss: 3.5070; Validation loss: 8.5482
Iteration: 1702; Percent complete: 17.0%; Training loss: 3.2120; Validation loss: 8.4884
Iteration: 1703; Percent complete: 17.0%; Training loss: 3.4186; Validation loss: 8.0877
Iteration: 1704; Percent complete: 17.0%; Training loss: 3.3845; Validation loss: 8.3037
Iteration: 1705; Percent complete: 17.1%; Training loss: 3.1956; Validation loss: 7.9911
Iteration: 1706; Percent complete: 17.1%; Training loss: 3.3721; Validation loss: 8.3829
Iteration: 1707; Perc

Iteration: 1789; Percent complete: 17.9%; Training loss: 3.3181; Validation loss: 8.2289
Iteration: 1790; Percent complete: 17.9%; Training loss: 3.2127; Validation loss: 8.2460
Iteration: 1791; Percent complete: 17.9%; Training loss: 3.1151; Validation loss: 8.7901
Iteration: 1792; Percent complete: 17.9%; Training loss: 3.3012; Validation loss: 8.2614
Iteration: 1793; Percent complete: 17.9%; Training loss: 3.1353; Validation loss: 8.2739
Iteration: 1794; Percent complete: 17.9%; Training loss: 3.1633; Validation loss: 8.6058
Iteration: 1795; Percent complete: 17.9%; Training loss: 3.2322; Validation loss: 8.0874
Iteration: 1796; Percent complete: 18.0%; Training loss: 3.3696; Validation loss: 8.0116
Iteration: 1797; Percent complete: 18.0%; Training loss: 3.1967; Validation loss: 8.2727
Iteration: 1798; Percent complete: 18.0%; Training loss: 3.2002; Validation loss: 8.4900
Iteration: 1799; Percent complete: 18.0%; Training loss: 3.0779; Validation loss: 8.1258
Iteration: 1800; Perc

Iteration: 1882; Percent complete: 18.8%; Training loss: 3.2456; Validation loss: 8.5737
Iteration: 1883; Percent complete: 18.8%; Training loss: 3.2997; Validation loss: 8.2938
Iteration: 1884; Percent complete: 18.8%; Training loss: 3.2109; Validation loss: 8.0633
Iteration: 1885; Percent complete: 18.9%; Training loss: 3.2728; Validation loss: 8.0141
Iteration: 1886; Percent complete: 18.9%; Training loss: 3.4508; Validation loss: 8.0940
Iteration: 1887; Percent complete: 18.9%; Training loss: 3.3015; Validation loss: 8.5782
Iteration: 1888; Percent complete: 18.9%; Training loss: 3.3553; Validation loss: 8.2723
Iteration: 1889; Percent complete: 18.9%; Training loss: 3.1280; Validation loss: 8.3608
Iteration: 1890; Percent complete: 18.9%; Training loss: 3.2796; Validation loss: 8.1460
Iteration: 1891; Percent complete: 18.9%; Training loss: 3.0997; Validation loss: 7.9964
Iteration: 1892; Percent complete: 18.9%; Training loss: 3.3428; Validation loss: 8.2939
Iteration: 1893; Perc

Iteration: 1975; Percent complete: 19.8%; Training loss: 3.3564; Validation loss: 8.3480
Iteration: 1976; Percent complete: 19.8%; Training loss: 3.3579; Validation loss: 8.2739
Iteration: 1977; Percent complete: 19.8%; Training loss: 3.3637; Validation loss: 8.5464
Iteration: 1978; Percent complete: 19.8%; Training loss: 3.6062; Validation loss: 8.2944
Iteration: 1979; Percent complete: 19.8%; Training loss: 3.4202; Validation loss: 8.7214
Iteration: 1980; Percent complete: 19.8%; Training loss: 2.9273; Validation loss: 8.4666
Iteration: 1981; Percent complete: 19.8%; Training loss: 3.0197; Validation loss: 8.4264
Iteration: 1982; Percent complete: 19.8%; Training loss: 3.3539; Validation loss: 8.0929
Iteration: 1983; Percent complete: 19.8%; Training loss: 3.1058; Validation loss: 7.8909
Iteration: 1984; Percent complete: 19.8%; Training loss: 3.3221; Validation loss: 8.3394
Iteration: 1985; Percent complete: 19.9%; Training loss: 3.4433; Validation loss: 8.1283
Iteration: 1986; Perc

Iteration: 2068; Percent complete: 20.7%; Training loss: 3.1348; Validation loss: 7.9918
Iteration: 2069; Percent complete: 20.7%; Training loss: 3.3263; Validation loss: 8.3368
Iteration: 2070; Percent complete: 20.7%; Training loss: 3.3047; Validation loss: 8.3580
Iteration: 2071; Percent complete: 20.7%; Training loss: 3.0900; Validation loss: 8.4785
Iteration: 2072; Percent complete: 20.7%; Training loss: 2.9237; Validation loss: 8.7912
Iteration: 2073; Percent complete: 20.7%; Training loss: 3.1130; Validation loss: 8.1839
Iteration: 2074; Percent complete: 20.7%; Training loss: 3.1982; Validation loss: 8.2924
Iteration: 2075; Percent complete: 20.8%; Training loss: 3.0472; Validation loss: 8.4383
Iteration: 2076; Percent complete: 20.8%; Training loss: 3.3428; Validation loss: 8.4101
Iteration: 2077; Percent complete: 20.8%; Training loss: 3.1534; Validation loss: 8.1819
Iteration: 2078; Percent complete: 20.8%; Training loss: 3.5259; Validation loss: 8.3567
Iteration: 2079; Perc

Iteration: 2161; Percent complete: 21.6%; Training loss: 3.2953; Validation loss: 8.2616
Iteration: 2162; Percent complete: 21.6%; Training loss: 3.3834; Validation loss: 7.9912
Iteration: 2163; Percent complete: 21.6%; Training loss: 3.2305; Validation loss: 8.2398
Iteration: 2164; Percent complete: 21.6%; Training loss: 3.2372; Validation loss: 8.4208
Iteration: 2165; Percent complete: 21.6%; Training loss: 3.2880; Validation loss: 8.4291
Iteration: 2166; Percent complete: 21.7%; Training loss: 3.0823; Validation loss: 8.8189
Iteration: 2167; Percent complete: 21.7%; Training loss: 3.2977; Validation loss: 8.3955
Iteration: 2168; Percent complete: 21.7%; Training loss: 3.2385; Validation loss: 8.5523
Iteration: 2169; Percent complete: 21.7%; Training loss: 3.2641; Validation loss: 8.1932
Iteration: 2170; Percent complete: 21.7%; Training loss: 3.3038; Validation loss: 8.1048
Iteration: 2171; Percent complete: 21.7%; Training loss: 3.4152; Validation loss: 8.2376
Iteration: 2172; Perc

Iteration: 2254; Percent complete: 22.5%; Training loss: 3.2768; Validation loss: 8.6443
Iteration: 2255; Percent complete: 22.6%; Training loss: 3.1949; Validation loss: 8.6339
Iteration: 2256; Percent complete: 22.6%; Training loss: 3.2859; Validation loss: 8.1560
Iteration: 2257; Percent complete: 22.6%; Training loss: 3.0147; Validation loss: 8.4099
Iteration: 2258; Percent complete: 22.6%; Training loss: 3.1824; Validation loss: 8.8183
Iteration: 2259; Percent complete: 22.6%; Training loss: 3.4203; Validation loss: 8.3997
Iteration: 2260; Percent complete: 22.6%; Training loss: 3.3481; Validation loss: 8.5854
Iteration: 2261; Percent complete: 22.6%; Training loss: 3.0774; Validation loss: 8.4374
Iteration: 2262; Percent complete: 22.6%; Training loss: 3.2820; Validation loss: 8.3125
Iteration: 2263; Percent complete: 22.6%; Training loss: 2.8402; Validation loss: 7.9037
Iteration: 2264; Percent complete: 22.6%; Training loss: 3.1086; Validation loss: 8.4189
Iteration: 2265; Perc

Iteration: 2347; Percent complete: 23.5%; Training loss: 3.0826; Validation loss: 8.0559
Iteration: 2348; Percent complete: 23.5%; Training loss: 2.9551; Validation loss: 8.2892
Iteration: 2349; Percent complete: 23.5%; Training loss: 3.0618; Validation loss: 8.4692
Iteration: 2350; Percent complete: 23.5%; Training loss: 2.9775; Validation loss: 8.5258
Iteration: 2351; Percent complete: 23.5%; Training loss: 3.1074; Validation loss: 8.3565
Iteration: 2352; Percent complete: 23.5%; Training loss: 3.1003; Validation loss: 8.3079
Iteration: 2353; Percent complete: 23.5%; Training loss: 3.1577; Validation loss: 8.6953
Iteration: 2354; Percent complete: 23.5%; Training loss: 3.1024; Validation loss: 8.0786
Iteration: 2355; Percent complete: 23.5%; Training loss: 3.3519; Validation loss: 8.2877
Iteration: 2356; Percent complete: 23.6%; Training loss: 3.2547; Validation loss: 8.0459
Iteration: 2357; Percent complete: 23.6%; Training loss: 3.2500; Validation loss: 7.7770
Iteration: 2358; Perc

Iteration: 2440; Percent complete: 24.4%; Training loss: 3.2308; Validation loss: 8.6816
Iteration: 2441; Percent complete: 24.4%; Training loss: 2.9641; Validation loss: 8.3893
Iteration: 2442; Percent complete: 24.4%; Training loss: 3.1298; Validation loss: 8.0641
Iteration: 2443; Percent complete: 24.4%; Training loss: 3.0721; Validation loss: 8.6751
Iteration: 2444; Percent complete: 24.4%; Training loss: 3.1004; Validation loss: 8.3444
Iteration: 2445; Percent complete: 24.4%; Training loss: 3.2375; Validation loss: 8.3169
Iteration: 2446; Percent complete: 24.5%; Training loss: 3.1836; Validation loss: 8.4283
Iteration: 2447; Percent complete: 24.5%; Training loss: 3.2466; Validation loss: 8.6992
Iteration: 2448; Percent complete: 24.5%; Training loss: 3.1688; Validation loss: 8.1899
Iteration: 2449; Percent complete: 24.5%; Training loss: 3.3570; Validation loss: 8.4483
Iteration: 2450; Percent complete: 24.5%; Training loss: 3.1513; Validation loss: 8.2793
Iteration: 2451; Perc

Iteration: 2533; Percent complete: 25.3%; Training loss: 3.1296; Validation loss: 8.1113
Iteration: 2534; Percent complete: 25.3%; Training loss: 2.9966; Validation loss: 8.3382
Iteration: 2535; Percent complete: 25.4%; Training loss: 2.9926; Validation loss: 8.7565
Iteration: 2536; Percent complete: 25.4%; Training loss: 3.1172; Validation loss: 8.4016
Iteration: 2537; Percent complete: 25.4%; Training loss: 3.2536; Validation loss: 8.4502
Iteration: 2538; Percent complete: 25.4%; Training loss: 2.9688; Validation loss: 8.6905
Iteration: 2539; Percent complete: 25.4%; Training loss: 3.1345; Validation loss: 8.5985
Iteration: 2540; Percent complete: 25.4%; Training loss: 3.0104; Validation loss: 8.5981
Iteration: 2541; Percent complete: 25.4%; Training loss: 3.2547; Validation loss: 8.6997
Iteration: 2542; Percent complete: 25.4%; Training loss: 2.9475; Validation loss: 8.2890
Iteration: 2543; Percent complete: 25.4%; Training loss: 3.1260; Validation loss: 8.7317
Iteration: 2544; Perc

Iteration: 2626; Percent complete: 26.3%; Training loss: 2.9965; Validation loss: 8.5073
Iteration: 2627; Percent complete: 26.3%; Training loss: 3.2989; Validation loss: 8.8113
Iteration: 2628; Percent complete: 26.3%; Training loss: 3.1088; Validation loss: 8.4931
Iteration: 2629; Percent complete: 26.3%; Training loss: 3.2863; Validation loss: 8.4214
Iteration: 2630; Percent complete: 26.3%; Training loss: 3.2151; Validation loss: 8.5431
Iteration: 2631; Percent complete: 26.3%; Training loss: 3.2420; Validation loss: 8.7813
Iteration: 2632; Percent complete: 26.3%; Training loss: 2.8807; Validation loss: 8.5796
Iteration: 2633; Percent complete: 26.3%; Training loss: 3.0638; Validation loss: 8.3912
Iteration: 2634; Percent complete: 26.3%; Training loss: 2.8850; Validation loss: 8.4882
Iteration: 2635; Percent complete: 26.4%; Training loss: 3.2808; Validation loss: 8.0332
Iteration: 2636; Percent complete: 26.4%; Training loss: 3.1456; Validation loss: 8.7405
Iteration: 2637; Perc

Iteration: 2719; Percent complete: 27.2%; Training loss: 3.3539; Validation loss: 8.2568
Iteration: 2720; Percent complete: 27.2%; Training loss: 2.9691; Validation loss: 8.6855
Iteration: 2721; Percent complete: 27.2%; Training loss: 3.1812; Validation loss: 8.1182
Iteration: 2722; Percent complete: 27.2%; Training loss: 3.0189; Validation loss: 8.4548
Iteration: 2723; Percent complete: 27.2%; Training loss: 3.0146; Validation loss: 8.3266
Iteration: 2724; Percent complete: 27.2%; Training loss: 3.1304; Validation loss: 8.2998
Iteration: 2725; Percent complete: 27.3%; Training loss: 3.1732; Validation loss: 8.4463
Iteration: 2726; Percent complete: 27.3%; Training loss: 2.8692; Validation loss: 8.3498
Iteration: 2727; Percent complete: 27.3%; Training loss: 3.1559; Validation loss: 8.3436
Iteration: 2728; Percent complete: 27.3%; Training loss: 3.1483; Validation loss: 8.2036
Iteration: 2729; Percent complete: 27.3%; Training loss: 2.9224; Validation loss: 8.3881
Iteration: 2730; Perc

Iteration: 2812; Percent complete: 28.1%; Training loss: 2.9762; Validation loss: 8.1106
Iteration: 2813; Percent complete: 28.1%; Training loss: 3.3022; Validation loss: 8.6435
Iteration: 2814; Percent complete: 28.1%; Training loss: 2.8230; Validation loss: 8.8410
Iteration: 2815; Percent complete: 28.1%; Training loss: 3.1508; Validation loss: 8.3879
Iteration: 2816; Percent complete: 28.2%; Training loss: 3.0173; Validation loss: 8.0775
Iteration: 2817; Percent complete: 28.2%; Training loss: 3.0651; Validation loss: 9.2154
Iteration: 2818; Percent complete: 28.2%; Training loss: 2.9186; Validation loss: 8.0460
Iteration: 2819; Percent complete: 28.2%; Training loss: 3.1869; Validation loss: 8.7926
Iteration: 2820; Percent complete: 28.2%; Training loss: 3.0274; Validation loss: 8.3021
Iteration: 2821; Percent complete: 28.2%; Training loss: 3.0709; Validation loss: 8.4502
Iteration: 2822; Percent complete: 28.2%; Training loss: 2.9301; Validation loss: 8.3535
Iteration: 2823; Perc

Iteration: 2905; Percent complete: 29.0%; Training loss: 2.9968; Validation loss: 8.1899
Iteration: 2906; Percent complete: 29.1%; Training loss: 2.7864; Validation loss: 8.2565
Iteration: 2907; Percent complete: 29.1%; Training loss: 3.0319; Validation loss: 8.3276
Iteration: 2908; Percent complete: 29.1%; Training loss: 2.8719; Validation loss: 8.5967
Iteration: 2909; Percent complete: 29.1%; Training loss: 2.9164; Validation loss: 8.5336
Iteration: 2910; Percent complete: 29.1%; Training loss: 2.8951; Validation loss: 8.3943
Iteration: 2911; Percent complete: 29.1%; Training loss: 2.8223; Validation loss: 8.3857
Iteration: 2912; Percent complete: 29.1%; Training loss: 3.1551; Validation loss: 8.4802
Iteration: 2913; Percent complete: 29.1%; Training loss: 2.8866; Validation loss: 8.2341
Iteration: 2914; Percent complete: 29.1%; Training loss: 3.0564; Validation loss: 8.4394
Iteration: 2915; Percent complete: 29.1%; Training loss: 2.9567; Validation loss: 8.3399
Iteration: 2916; Perc

Iteration: 2998; Percent complete: 30.0%; Training loss: 2.8015; Validation loss: 8.6152
Iteration: 2999; Percent complete: 30.0%; Training loss: 3.0175; Validation loss: 8.6663
Iteration: 3000; Percent complete: 30.0%; Training loss: 3.1670; Validation loss: 8.6071
Iteration: 3001; Percent complete: 30.0%; Training loss: 3.1675; Validation loss: 8.5034
Iteration: 3002; Percent complete: 30.0%; Training loss: 2.8660; Validation loss: 8.4993
Iteration: 3003; Percent complete: 30.0%; Training loss: 3.3287; Validation loss: 8.2629
Iteration: 3004; Percent complete: 30.0%; Training loss: 2.7474; Validation loss: 8.8778
Iteration: 3005; Percent complete: 30.0%; Training loss: 3.1913; Validation loss: 8.4747
Iteration: 3006; Percent complete: 30.1%; Training loss: 2.6489; Validation loss: 8.5526
Iteration: 3007; Percent complete: 30.1%; Training loss: 3.3540; Validation loss: 8.5349
Iteration: 3008; Percent complete: 30.1%; Training loss: 3.1501; Validation loss: 8.6816
Iteration: 3009; Perc

Iteration: 3091; Percent complete: 30.9%; Training loss: 3.1889; Validation loss: 8.3907
Iteration: 3092; Percent complete: 30.9%; Training loss: 3.0503; Validation loss: 8.2165
Iteration: 3093; Percent complete: 30.9%; Training loss: 2.8738; Validation loss: 8.4887
Iteration: 3094; Percent complete: 30.9%; Training loss: 3.0279; Validation loss: 8.7211
Iteration: 3095; Percent complete: 30.9%; Training loss: 2.9049; Validation loss: 8.4957
Iteration: 3096; Percent complete: 31.0%; Training loss: 3.1589; Validation loss: 8.4312
Iteration: 3097; Percent complete: 31.0%; Training loss: 3.1979; Validation loss: 8.5463
Iteration: 3098; Percent complete: 31.0%; Training loss: 3.0144; Validation loss: 8.3564
Iteration: 3099; Percent complete: 31.0%; Training loss: 2.8412; Validation loss: 8.8731
Iteration: 3100; Percent complete: 31.0%; Training loss: 2.9658; Validation loss: 8.6782
Iteration: 3101; Percent complete: 31.0%; Training loss: 2.8539; Validation loss: 8.3256
Iteration: 3102; Perc

Iteration: 3184; Percent complete: 31.8%; Training loss: 2.9433; Validation loss: 8.7300
Iteration: 3185; Percent complete: 31.9%; Training loss: 2.8452; Validation loss: 8.5479
Iteration: 3186; Percent complete: 31.9%; Training loss: 2.9690; Validation loss: 8.7067
Iteration: 3187; Percent complete: 31.9%; Training loss: 2.9663; Validation loss: 8.5654
Iteration: 3188; Percent complete: 31.9%; Training loss: 2.7771; Validation loss: 9.1972
Iteration: 3189; Percent complete: 31.9%; Training loss: 2.8549; Validation loss: 8.3848
Iteration: 3190; Percent complete: 31.9%; Training loss: 2.9242; Validation loss: 8.4459
Iteration: 3191; Percent complete: 31.9%; Training loss: 2.9252; Validation loss: 8.5202
Iteration: 3192; Percent complete: 31.9%; Training loss: 2.8884; Validation loss: 8.5709
Iteration: 3193; Percent complete: 31.9%; Training loss: 2.8165; Validation loss: 8.6481
Iteration: 3194; Percent complete: 31.9%; Training loss: 3.0438; Validation loss: 8.7097
Iteration: 3195; Perc

Iteration: 3277; Percent complete: 32.8%; Training loss: 2.8567; Validation loss: 8.9363
Iteration: 3278; Percent complete: 32.8%; Training loss: 2.8716; Validation loss: 8.5430
Iteration: 3279; Percent complete: 32.8%; Training loss: 2.7246; Validation loss: 8.6029
Iteration: 3280; Percent complete: 32.8%; Training loss: 2.9370; Validation loss: 8.8066
Iteration: 3281; Percent complete: 32.8%; Training loss: 2.6651; Validation loss: 8.8828
Iteration: 3282; Percent complete: 32.8%; Training loss: 2.7883; Validation loss: 8.8650
Iteration: 3283; Percent complete: 32.8%; Training loss: 2.8123; Validation loss: 8.5521
Iteration: 3284; Percent complete: 32.8%; Training loss: 2.7985; Validation loss: 8.2996
Iteration: 3285; Percent complete: 32.9%; Training loss: 2.9381; Validation loss: 8.5968
Iteration: 3286; Percent complete: 32.9%; Training loss: 2.9535; Validation loss: 8.7535
Iteration: 3287; Percent complete: 32.9%; Training loss: 2.7577; Validation loss: 8.4145
Iteration: 3288; Perc

Iteration: 3370; Percent complete: 33.7%; Training loss: 2.7799; Validation loss: 8.9870
Iteration: 3371; Percent complete: 33.7%; Training loss: 2.8357; Validation loss: 8.8401
Iteration: 3372; Percent complete: 33.7%; Training loss: 3.0480; Validation loss: 8.2828
Iteration: 3373; Percent complete: 33.7%; Training loss: 2.7157; Validation loss: 8.7557
Iteration: 3374; Percent complete: 33.7%; Training loss: 2.8368; Validation loss: 8.8003
Iteration: 3375; Percent complete: 33.8%; Training loss: 3.0527; Validation loss: 8.7751
Iteration: 3376; Percent complete: 33.8%; Training loss: 2.9075; Validation loss: 8.6782
Iteration: 3377; Percent complete: 33.8%; Training loss: 2.8333; Validation loss: 9.0811
Iteration: 3378; Percent complete: 33.8%; Training loss: 2.9917; Validation loss: 8.6843
Iteration: 3379; Percent complete: 33.8%; Training loss: 2.7089; Validation loss: 8.8402
Iteration: 3380; Percent complete: 33.8%; Training loss: 2.7756; Validation loss: 9.0315
Iteration: 3381; Perc

Iteration: 3463; Percent complete: 34.6%; Training loss: 2.5231; Validation loss: 8.7915
Iteration: 3464; Percent complete: 34.6%; Training loss: 2.7230; Validation loss: 8.7393
Iteration: 3465; Percent complete: 34.6%; Training loss: 2.7169; Validation loss: 8.9169
Iteration: 3466; Percent complete: 34.7%; Training loss: 2.6653; Validation loss: 8.9074
Iteration: 3467; Percent complete: 34.7%; Training loss: 2.8583; Validation loss: 8.4692
Iteration: 3468; Percent complete: 34.7%; Training loss: 2.6845; Validation loss: 8.7104
Iteration: 3469; Percent complete: 34.7%; Training loss: 2.8902; Validation loss: 8.7491
Iteration: 3470; Percent complete: 34.7%; Training loss: 2.7164; Validation loss: 8.7878
Iteration: 3471; Percent complete: 34.7%; Training loss: 2.9734; Validation loss: 8.6210
Iteration: 3472; Percent complete: 34.7%; Training loss: 2.6776; Validation loss: 8.6820
Iteration: 3473; Percent complete: 34.7%; Training loss: 2.7238; Validation loss: 8.7711
Iteration: 3474; Perc

Iteration: 3556; Percent complete: 35.6%; Training loss: 2.7461; Validation loss: 8.9327
Iteration: 3557; Percent complete: 35.6%; Training loss: 2.6560; Validation loss: 8.5299
Iteration: 3558; Percent complete: 35.6%; Training loss: 2.8045; Validation loss: 8.7577
Iteration: 3559; Percent complete: 35.6%; Training loss: 2.7239; Validation loss: 8.6772
Iteration: 3560; Percent complete: 35.6%; Training loss: 2.7780; Validation loss: 8.5504
Iteration: 3561; Percent complete: 35.6%; Training loss: 2.8068; Validation loss: 8.4710
Iteration: 3562; Percent complete: 35.6%; Training loss: 2.8658; Validation loss: 8.6030
Iteration: 3563; Percent complete: 35.6%; Training loss: 2.6937; Validation loss: 8.9374
Iteration: 3564; Percent complete: 35.6%; Training loss: 2.8877; Validation loss: 8.6288
Iteration: 3565; Percent complete: 35.6%; Training loss: 2.6448; Validation loss: 8.3737
Iteration: 3566; Percent complete: 35.7%; Training loss: 2.7878; Validation loss: 8.6875
Iteration: 3567; Perc

Iteration: 3649; Percent complete: 36.5%; Training loss: 2.6597; Validation loss: 8.4131
Iteration: 3650; Percent complete: 36.5%; Training loss: 2.9893; Validation loss: 8.3014
Iteration: 3651; Percent complete: 36.5%; Training loss: 2.6157; Validation loss: 8.3881
Iteration: 3652; Percent complete: 36.5%; Training loss: 2.7581; Validation loss: 9.0915
Iteration: 3653; Percent complete: 36.5%; Training loss: 2.7839; Validation loss: 8.4011
Iteration: 3654; Percent complete: 36.5%; Training loss: 2.8440; Validation loss: 8.4427
Iteration: 3655; Percent complete: 36.5%; Training loss: 2.6249; Validation loss: 8.5473
Iteration: 3656; Percent complete: 36.6%; Training loss: 2.7013; Validation loss: 8.9713
Iteration: 3657; Percent complete: 36.6%; Training loss: 2.8183; Validation loss: 8.7340
Iteration: 3658; Percent complete: 36.6%; Training loss: 2.4987; Validation loss: 8.7182
Iteration: 3659; Percent complete: 36.6%; Training loss: 2.7008; Validation loss: 8.8260
Iteration: 3660; Perc

Iteration: 3742; Percent complete: 37.4%; Training loss: 2.9584; Validation loss: 8.8485
Iteration: 3743; Percent complete: 37.4%; Training loss: 2.7119; Validation loss: 8.4631
Iteration: 3744; Percent complete: 37.4%; Training loss: 2.7682; Validation loss: 8.8743
Iteration: 3745; Percent complete: 37.5%; Training loss: 2.7852; Validation loss: 8.4773
Iteration: 3746; Percent complete: 37.5%; Training loss: 2.9567; Validation loss: 8.5652
Iteration: 3747; Percent complete: 37.5%; Training loss: 2.6531; Validation loss: 8.7758
Iteration: 3748; Percent complete: 37.5%; Training loss: 2.6535; Validation loss: 8.6672
Iteration: 3749; Percent complete: 37.5%; Training loss: 2.6157; Validation loss: 8.8829
Iteration: 3750; Percent complete: 37.5%; Training loss: 2.4782; Validation loss: 9.2670
Iteration: 3751; Percent complete: 37.5%; Training loss: 2.5189; Validation loss: 8.8976
Iteration: 3752; Percent complete: 37.5%; Training loss: 2.8821; Validation loss: 8.5383
Iteration: 3753; Perc

Iteration: 3835; Percent complete: 38.4%; Training loss: 2.9336; Validation loss: 8.6354
Iteration: 3836; Percent complete: 38.4%; Training loss: 2.6990; Validation loss: 8.3902
Iteration: 3837; Percent complete: 38.4%; Training loss: 2.7789; Validation loss: 8.5718
Iteration: 3838; Percent complete: 38.4%; Training loss: 2.8084; Validation loss: 8.7705
Iteration: 3839; Percent complete: 38.4%; Training loss: 2.7865; Validation loss: 8.6114
Iteration: 3840; Percent complete: 38.4%; Training loss: 2.8165; Validation loss: 8.7029
Iteration: 3841; Percent complete: 38.4%; Training loss: 2.8692; Validation loss: 8.2897
Iteration: 3842; Percent complete: 38.4%; Training loss: 2.7501; Validation loss: 8.6753
Iteration: 3843; Percent complete: 38.4%; Training loss: 2.7154; Validation loss: 8.2930
Iteration: 3844; Percent complete: 38.4%; Training loss: 2.8360; Validation loss: 8.9807
Iteration: 3845; Percent complete: 38.5%; Training loss: 2.8230; Validation loss: 8.7544
Iteration: 3846; Perc

Iteration: 3928; Percent complete: 39.3%; Training loss: 2.6091; Validation loss: 8.9816
Iteration: 3929; Percent complete: 39.3%; Training loss: 2.8798; Validation loss: 8.3023
Iteration: 3930; Percent complete: 39.3%; Training loss: 2.5166; Validation loss: 8.3566
Iteration: 3931; Percent complete: 39.3%; Training loss: 2.9161; Validation loss: 8.9511
Iteration: 3932; Percent complete: 39.3%; Training loss: 2.5839; Validation loss: 8.4779
Iteration: 3933; Percent complete: 39.3%; Training loss: 2.6862; Validation loss: 8.3815
Iteration: 3934; Percent complete: 39.3%; Training loss: 2.5854; Validation loss: 9.0177
Iteration: 3935; Percent complete: 39.4%; Training loss: 2.8467; Validation loss: 8.4489
Iteration: 3936; Percent complete: 39.4%; Training loss: 2.6671; Validation loss: 8.7523
Iteration: 3937; Percent complete: 39.4%; Training loss: 2.7071; Validation loss: 8.8285
Iteration: 3938; Percent complete: 39.4%; Training loss: 2.4751; Validation loss: 8.9930
Iteration: 3939; Perc

Iteration: 4021; Percent complete: 40.2%; Training loss: 2.3803; Validation loss: 8.5211
Iteration: 4022; Percent complete: 40.2%; Training loss: 2.6537; Validation loss: 9.1611
Iteration: 4023; Percent complete: 40.2%; Training loss: 2.7779; Validation loss: 9.2159
Iteration: 4024; Percent complete: 40.2%; Training loss: 2.6462; Validation loss: 8.5502
Iteration: 4025; Percent complete: 40.2%; Training loss: 2.8431; Validation loss: 8.5208
Iteration: 4026; Percent complete: 40.3%; Training loss: 2.6109; Validation loss: 8.4309
Iteration: 4027; Percent complete: 40.3%; Training loss: 2.7596; Validation loss: 8.7506
Iteration: 4028; Percent complete: 40.3%; Training loss: 2.5327; Validation loss: 8.8498
Iteration: 4029; Percent complete: 40.3%; Training loss: 2.5179; Validation loss: 9.1935
Iteration: 4030; Percent complete: 40.3%; Training loss: 2.6981; Validation loss: 9.0036
Iteration: 4031; Percent complete: 40.3%; Training loss: 2.6395; Validation loss: 8.7122
Iteration: 4032; Perc

Iteration: 4114; Percent complete: 41.1%; Training loss: 2.5971; Validation loss: 8.7715
Iteration: 4115; Percent complete: 41.1%; Training loss: 2.7073; Validation loss: 8.7410
Iteration: 4116; Percent complete: 41.2%; Training loss: 2.6281; Validation loss: 8.9083
Iteration: 4117; Percent complete: 41.2%; Training loss: 2.7712; Validation loss: 9.1789
Iteration: 4118; Percent complete: 41.2%; Training loss: 2.4373; Validation loss: 9.0603
Iteration: 4119; Percent complete: 41.2%; Training loss: 2.5428; Validation loss: 8.7247
Iteration: 4120; Percent complete: 41.2%; Training loss: 2.6838; Validation loss: 8.7513
Iteration: 4121; Percent complete: 41.2%; Training loss: 2.7483; Validation loss: 8.7794
Iteration: 4122; Percent complete: 41.2%; Training loss: 2.5895; Validation loss: 8.8062
Iteration: 4123; Percent complete: 41.2%; Training loss: 2.8013; Validation loss: 8.7845
Iteration: 4124; Percent complete: 41.2%; Training loss: 2.6831; Validation loss: 8.5814
Iteration: 4125; Perc

Iteration: 4207; Percent complete: 42.1%; Training loss: 3.0380; Validation loss: 8.5814
Iteration: 4208; Percent complete: 42.1%; Training loss: 2.5716; Validation loss: 8.7916
Iteration: 4209; Percent complete: 42.1%; Training loss: 2.4831; Validation loss: 8.9927
Iteration: 4210; Percent complete: 42.1%; Training loss: 2.5381; Validation loss: 8.9672
Iteration: 4211; Percent complete: 42.1%; Training loss: 2.5453; Validation loss: 8.8827
Iteration: 4212; Percent complete: 42.1%; Training loss: 2.4229; Validation loss: 8.7302
Iteration: 4213; Percent complete: 42.1%; Training loss: 3.0455; Validation loss: 8.7136
Iteration: 4214; Percent complete: 42.1%; Training loss: 2.7926; Validation loss: 8.6706
Iteration: 4215; Percent complete: 42.1%; Training loss: 2.5976; Validation loss: 8.6265
Iteration: 4216; Percent complete: 42.2%; Training loss: 2.7075; Validation loss: 8.7667
Iteration: 4217; Percent complete: 42.2%; Training loss: 2.6508; Validation loss: 8.7306
Iteration: 4218; Perc

Iteration: 4300; Percent complete: 43.0%; Training loss: 2.8479; Validation loss: 8.7617
Iteration: 4301; Percent complete: 43.0%; Training loss: 2.5398; Validation loss: 9.0752
Iteration: 4302; Percent complete: 43.0%; Training loss: 2.7399; Validation loss: 8.4660
Iteration: 4303; Percent complete: 43.0%; Training loss: 2.5084; Validation loss: 8.8633
Iteration: 4304; Percent complete: 43.0%; Training loss: 2.4740; Validation loss: 8.8639
Iteration: 4305; Percent complete: 43.0%; Training loss: 2.5979; Validation loss: 8.5466
Iteration: 4306; Percent complete: 43.1%; Training loss: 2.6965; Validation loss: 8.9934
Iteration: 4307; Percent complete: 43.1%; Training loss: 2.6458; Validation loss: 9.0057
Iteration: 4308; Percent complete: 43.1%; Training loss: 2.6285; Validation loss: 9.1704
Iteration: 4309; Percent complete: 43.1%; Training loss: 2.6983; Validation loss: 9.1254
Iteration: 4310; Percent complete: 43.1%; Training loss: 2.6783; Validation loss: 9.2084
Iteration: 4311; Perc

Iteration: 4393; Percent complete: 43.9%; Training loss: 2.6395; Validation loss: 8.8867
Iteration: 4394; Percent complete: 43.9%; Training loss: 2.6071; Validation loss: 8.8333
Iteration: 4395; Percent complete: 44.0%; Training loss: 2.3797; Validation loss: 8.8220
Iteration: 4396; Percent complete: 44.0%; Training loss: 2.3857; Validation loss: 9.3000
Iteration: 4397; Percent complete: 44.0%; Training loss: 2.3705; Validation loss: 8.7490
Iteration: 4398; Percent complete: 44.0%; Training loss: 2.4471; Validation loss: 8.8876
Iteration: 4399; Percent complete: 44.0%; Training loss: 2.7107; Validation loss: 9.2144
Iteration: 4400; Percent complete: 44.0%; Training loss: 2.5863; Validation loss: 8.9698
Iteration: 4401; Percent complete: 44.0%; Training loss: 2.5241; Validation loss: 9.0520
Iteration: 4402; Percent complete: 44.0%; Training loss: 2.5937; Validation loss: 9.1526
Iteration: 4403; Percent complete: 44.0%; Training loss: 2.4000; Validation loss: 9.1275
Iteration: 4404; Perc

Iteration: 4486; Percent complete: 44.9%; Training loss: 2.6500; Validation loss: 9.0911
Iteration: 4487; Percent complete: 44.9%; Training loss: 2.6014; Validation loss: 9.0866
Iteration: 4488; Percent complete: 44.9%; Training loss: 2.6002; Validation loss: 8.7212
Iteration: 4489; Percent complete: 44.9%; Training loss: 2.7309; Validation loss: 8.8059
Iteration: 4490; Percent complete: 44.9%; Training loss: 2.5221; Validation loss: 8.9123
Iteration: 4491; Percent complete: 44.9%; Training loss: 2.5958; Validation loss: 9.1519
Iteration: 4492; Percent complete: 44.9%; Training loss: 2.6931; Validation loss: 8.9866
Iteration: 4493; Percent complete: 44.9%; Training loss: 2.5215; Validation loss: 8.7698
Iteration: 4494; Percent complete: 44.9%; Training loss: 2.5108; Validation loss: 8.7968
Iteration: 4495; Percent complete: 45.0%; Training loss: 2.8049; Validation loss: 8.7716
Iteration: 4496; Percent complete: 45.0%; Training loss: 2.6116; Validation loss: 8.9352
Iteration: 4497; Perc

Iteration: 4579; Percent complete: 45.8%; Training loss: 2.5918; Validation loss: 8.7431
Iteration: 4580; Percent complete: 45.8%; Training loss: 2.4077; Validation loss: 8.6017
Iteration: 4581; Percent complete: 45.8%; Training loss: 2.6565; Validation loss: 9.0514
Iteration: 4582; Percent complete: 45.8%; Training loss: 2.4306; Validation loss: 9.0916
Iteration: 4583; Percent complete: 45.8%; Training loss: 2.3351; Validation loss: 9.0013
Iteration: 4584; Percent complete: 45.8%; Training loss: 2.8028; Validation loss: 9.0482
Iteration: 4585; Percent complete: 45.9%; Training loss: 2.4539; Validation loss: 8.9390
Iteration: 4586; Percent complete: 45.9%; Training loss: 2.4590; Validation loss: 8.7325
Iteration: 4587; Percent complete: 45.9%; Training loss: 2.6381; Validation loss: 8.6414
Iteration: 4588; Percent complete: 45.9%; Training loss: 2.7067; Validation loss: 8.5891
Iteration: 4589; Percent complete: 45.9%; Training loss: 2.3320; Validation loss: 8.9507
Iteration: 4590; Perc

Iteration: 4672; Percent complete: 46.7%; Training loss: 2.6298; Validation loss: 8.9017
Iteration: 4673; Percent complete: 46.7%; Training loss: 2.5012; Validation loss: 9.2060
Iteration: 4674; Percent complete: 46.7%; Training loss: 2.5524; Validation loss: 9.0143
Iteration: 4675; Percent complete: 46.8%; Training loss: 2.5154; Validation loss: 9.0710
Iteration: 4676; Percent complete: 46.8%; Training loss: 2.5582; Validation loss: 9.0744
Iteration: 4677; Percent complete: 46.8%; Training loss: 2.6302; Validation loss: 9.0162
Iteration: 4678; Percent complete: 46.8%; Training loss: 2.5472; Validation loss: 9.2512
Iteration: 4679; Percent complete: 46.8%; Training loss: 2.3463; Validation loss: 8.9553
Iteration: 4680; Percent complete: 46.8%; Training loss: 2.4235; Validation loss: 8.7612
Iteration: 4681; Percent complete: 46.8%; Training loss: 2.6720; Validation loss: 9.2352
Iteration: 4682; Percent complete: 46.8%; Training loss: 2.5035; Validation loss: 8.9572
Iteration: 4683; Perc

Iteration: 4765; Percent complete: 47.6%; Training loss: 2.5177; Validation loss: 8.8726
Iteration: 4766; Percent complete: 47.7%; Training loss: 2.3604; Validation loss: 8.7302
Iteration: 4767; Percent complete: 47.7%; Training loss: 2.3672; Validation loss: 8.9636
Iteration: 4768; Percent complete: 47.7%; Training loss: 2.3103; Validation loss: 9.0335
Iteration: 4769; Percent complete: 47.7%; Training loss: 2.4733; Validation loss: 9.1377
Iteration: 4770; Percent complete: 47.7%; Training loss: 2.7019; Validation loss: 8.9359
Iteration: 4771; Percent complete: 47.7%; Training loss: 2.3738; Validation loss: 8.9362
Iteration: 4772; Percent complete: 47.7%; Training loss: 2.4434; Validation loss: 9.1317
Iteration: 4773; Percent complete: 47.7%; Training loss: 2.2300; Validation loss: 8.5978
Iteration: 4774; Percent complete: 47.7%; Training loss: 2.4468; Validation loss: 8.9841
Iteration: 4775; Percent complete: 47.8%; Training loss: 2.6918; Validation loss: 9.0069
Iteration: 4776; Perc

Iteration: 4858; Percent complete: 48.6%; Training loss: 2.6252; Validation loss: 9.2783
Iteration: 4859; Percent complete: 48.6%; Training loss: 2.4450; Validation loss: 8.8823
Iteration: 4860; Percent complete: 48.6%; Training loss: 2.6132; Validation loss: 9.4322
Iteration: 4861; Percent complete: 48.6%; Training loss: 2.4331; Validation loss: 8.9133
Iteration: 4862; Percent complete: 48.6%; Training loss: 2.7177; Validation loss: 9.2482
Iteration: 4863; Percent complete: 48.6%; Training loss: 2.4546; Validation loss: 8.9154
Iteration: 4864; Percent complete: 48.6%; Training loss: 2.4395; Validation loss: 9.0195
Iteration: 4865; Percent complete: 48.6%; Training loss: 2.3825; Validation loss: 9.2258
Iteration: 4866; Percent complete: 48.7%; Training loss: 2.2462; Validation loss: 8.8012
Iteration: 4867; Percent complete: 48.7%; Training loss: 2.3456; Validation loss: 8.9107
Iteration: 4868; Percent complete: 48.7%; Training loss: 2.3943; Validation loss: 8.4943
Iteration: 4869; Perc

Iteration: 4951; Percent complete: 49.5%; Training loss: 2.4360; Validation loss: 9.0733
Iteration: 4952; Percent complete: 49.5%; Training loss: 2.3278; Validation loss: 9.2090
Iteration: 4953; Percent complete: 49.5%; Training loss: 2.4355; Validation loss: 8.6977
Iteration: 4954; Percent complete: 49.5%; Training loss: 2.3945; Validation loss: 8.7040
Iteration: 4955; Percent complete: 49.5%; Training loss: 2.4776; Validation loss: 9.3811
Iteration: 4956; Percent complete: 49.6%; Training loss: 2.2801; Validation loss: 8.9597
Iteration: 4957; Percent complete: 49.6%; Training loss: 2.4746; Validation loss: 9.1188
Iteration: 4958; Percent complete: 49.6%; Training loss: 2.4697; Validation loss: 8.7507
Iteration: 4959; Percent complete: 49.6%; Training loss: 2.5592; Validation loss: 8.5473
Iteration: 4960; Percent complete: 49.6%; Training loss: 2.2649; Validation loss: 8.7151
Iteration: 4961; Percent complete: 49.6%; Training loss: 2.4558; Validation loss: 9.1850
Iteration: 4962; Perc

Iteration: 5044; Percent complete: 50.4%; Training loss: 2.4945; Validation loss: 8.9667
Iteration: 5045; Percent complete: 50.4%; Training loss: 2.3198; Validation loss: 9.3971
Iteration: 5046; Percent complete: 50.5%; Training loss: 2.6270; Validation loss: 9.1300
Iteration: 5047; Percent complete: 50.5%; Training loss: 2.2694; Validation loss: 9.2222
Iteration: 5048; Percent complete: 50.5%; Training loss: 2.4801; Validation loss: 9.5251
Iteration: 5049; Percent complete: 50.5%; Training loss: 2.3131; Validation loss: 8.9000
Iteration: 5050; Percent complete: 50.5%; Training loss: 2.4780; Validation loss: 8.9462
Iteration: 5051; Percent complete: 50.5%; Training loss: 2.2525; Validation loss: 8.7591
Iteration: 5052; Percent complete: 50.5%; Training loss: 2.4032; Validation loss: 9.0280
Iteration: 5053; Percent complete: 50.5%; Training loss: 2.3851; Validation loss: 9.1594
Iteration: 5054; Percent complete: 50.5%; Training loss: 2.2809; Validation loss: 9.4054
Iteration: 5055; Perc

Iteration: 5137; Percent complete: 51.4%; Training loss: 2.3477; Validation loss: 9.2670
Iteration: 5138; Percent complete: 51.4%; Training loss: 2.3022; Validation loss: 9.1111
Iteration: 5139; Percent complete: 51.4%; Training loss: 2.2637; Validation loss: 9.1440
Iteration: 5140; Percent complete: 51.4%; Training loss: 2.2501; Validation loss: 9.4941
Iteration: 5141; Percent complete: 51.4%; Training loss: 2.4727; Validation loss: 8.9400
Iteration: 5142; Percent complete: 51.4%; Training loss: 2.3288; Validation loss: 9.2157
Iteration: 5143; Percent complete: 51.4%; Training loss: 2.5398; Validation loss: 8.7025
Iteration: 5144; Percent complete: 51.4%; Training loss: 2.5883; Validation loss: 9.1930
Iteration: 5145; Percent complete: 51.4%; Training loss: 2.5562; Validation loss: 9.0343
Iteration: 5146; Percent complete: 51.5%; Training loss: 2.4423; Validation loss: 9.1010
Iteration: 5147; Percent complete: 51.5%; Training loss: 2.4176; Validation loss: 8.9328
Iteration: 5148; Perc

Iteration: 5230; Percent complete: 52.3%; Training loss: 2.4071; Validation loss: 9.3508
Iteration: 5231; Percent complete: 52.3%; Training loss: 2.3839; Validation loss: 9.1217
Iteration: 5232; Percent complete: 52.3%; Training loss: 2.2844; Validation loss: 8.8753
Iteration: 5233; Percent complete: 52.3%; Training loss: 2.3499; Validation loss: 9.1662
Iteration: 5234; Percent complete: 52.3%; Training loss: 2.4314; Validation loss: 9.2840
Iteration: 5235; Percent complete: 52.3%; Training loss: 2.0721; Validation loss: 8.6286
Iteration: 5236; Percent complete: 52.4%; Training loss: 2.5318; Validation loss: 9.0074
Iteration: 5237; Percent complete: 52.4%; Training loss: 2.4185; Validation loss: 8.9911
Iteration: 5238; Percent complete: 52.4%; Training loss: 2.2073; Validation loss: 9.3604
Iteration: 5239; Percent complete: 52.4%; Training loss: 2.4297; Validation loss: 9.1764
Iteration: 5240; Percent complete: 52.4%; Training loss: 2.3118; Validation loss: 9.3094
Iteration: 5241; Perc

Iteration: 5323; Percent complete: 53.2%; Training loss: 2.4023; Validation loss: 8.9616
Iteration: 5324; Percent complete: 53.2%; Training loss: 2.4049; Validation loss: 8.9227
Iteration: 5325; Percent complete: 53.2%; Training loss: 2.2502; Validation loss: 9.6650
Iteration: 5326; Percent complete: 53.3%; Training loss: 2.3527; Validation loss: 9.2455
Iteration: 5327; Percent complete: 53.3%; Training loss: 2.5196; Validation loss: 9.3993
Iteration: 5328; Percent complete: 53.3%; Training loss: 2.4118; Validation loss: 9.3743
Iteration: 5329; Percent complete: 53.3%; Training loss: 2.6774; Validation loss: 9.1602
Iteration: 5330; Percent complete: 53.3%; Training loss: 2.3219; Validation loss: 9.7934
Iteration: 5331; Percent complete: 53.3%; Training loss: 2.2613; Validation loss: 9.2707
Iteration: 5332; Percent complete: 53.3%; Training loss: 2.1990; Validation loss: 9.7345
Iteration: 5333; Percent complete: 53.3%; Training loss: 2.1866; Validation loss: 9.3691
Iteration: 5334; Perc

Iteration: 5416; Percent complete: 54.2%; Training loss: 2.2605; Validation loss: 8.6456
Iteration: 5417; Percent complete: 54.2%; Training loss: 2.2423; Validation loss: 8.9911
Iteration: 5418; Percent complete: 54.2%; Training loss: 2.1770; Validation loss: 9.1188
Iteration: 5419; Percent complete: 54.2%; Training loss: 2.2276; Validation loss: 8.6779
Iteration: 5420; Percent complete: 54.2%; Training loss: 2.2666; Validation loss: 8.8692
Iteration: 5421; Percent complete: 54.2%; Training loss: 2.5805; Validation loss: 9.4863
Iteration: 5422; Percent complete: 54.2%; Training loss: 2.2415; Validation loss: 9.2019
Iteration: 5423; Percent complete: 54.2%; Training loss: 2.2269; Validation loss: 9.4707
Iteration: 5424; Percent complete: 54.2%; Training loss: 2.2492; Validation loss: 9.5803
Iteration: 5425; Percent complete: 54.2%; Training loss: 2.3725; Validation loss: 9.3277
Iteration: 5426; Percent complete: 54.3%; Training loss: 2.3406; Validation loss: 9.0746
Iteration: 5427; Perc

Iteration: 5509; Percent complete: 55.1%; Training loss: 2.3584; Validation loss: 9.2388
Iteration: 5510; Percent complete: 55.1%; Training loss: 2.0834; Validation loss: 9.3686
Iteration: 5511; Percent complete: 55.1%; Training loss: 2.3526; Validation loss: 9.2846
Iteration: 5512; Percent complete: 55.1%; Training loss: 2.0033; Validation loss: 9.1562
Iteration: 5513; Percent complete: 55.1%; Training loss: 2.0421; Validation loss: 9.4870
Iteration: 5514; Percent complete: 55.1%; Training loss: 2.4790; Validation loss: 8.9052
Iteration: 5515; Percent complete: 55.1%; Training loss: 2.2204; Validation loss: 9.1864
Iteration: 5516; Percent complete: 55.2%; Training loss: 2.3280; Validation loss: 9.7264
Iteration: 5517; Percent complete: 55.2%; Training loss: 2.4220; Validation loss: 9.4130
Iteration: 5518; Percent complete: 55.2%; Training loss: 2.1288; Validation loss: 9.1467
Iteration: 5519; Percent complete: 55.2%; Training loss: 2.1517; Validation loss: 9.5335
Iteration: 5520; Perc

Iteration: 5602; Percent complete: 56.0%; Training loss: 2.1639; Validation loss: 9.2367
Iteration: 5603; Percent complete: 56.0%; Training loss: 2.2524; Validation loss: 8.8948
Iteration: 5604; Percent complete: 56.0%; Training loss: 2.1408; Validation loss: 9.6776
Iteration: 5605; Percent complete: 56.0%; Training loss: 2.4194; Validation loss: 9.4053
Iteration: 5606; Percent complete: 56.1%; Training loss: 2.2951; Validation loss: 9.6008
Iteration: 5607; Percent complete: 56.1%; Training loss: 2.2792; Validation loss: 9.3429
Iteration: 5608; Percent complete: 56.1%; Training loss: 2.1824; Validation loss: 9.5222
Iteration: 5609; Percent complete: 56.1%; Training loss: 2.3067; Validation loss: 9.2212
Iteration: 5610; Percent complete: 56.1%; Training loss: 2.1392; Validation loss: 9.0644
Iteration: 5611; Percent complete: 56.1%; Training loss: 2.0791; Validation loss: 9.3541
Iteration: 5612; Percent complete: 56.1%; Training loss: 2.1492; Validation loss: 9.3217
Iteration: 5613; Perc

Iteration: 5695; Percent complete: 57.0%; Training loss: 2.3071; Validation loss: 9.7252
Iteration: 5696; Percent complete: 57.0%; Training loss: 2.1864; Validation loss: 9.1747
Iteration: 5697; Percent complete: 57.0%; Training loss: 2.0184; Validation loss: 9.4275
Iteration: 5698; Percent complete: 57.0%; Training loss: 2.2099; Validation loss: 9.3518
Iteration: 5699; Percent complete: 57.0%; Training loss: 2.3828; Validation loss: 9.3614
Iteration: 5700; Percent complete: 57.0%; Training loss: 2.1687; Validation loss: 9.6008
Iteration: 5701; Percent complete: 57.0%; Training loss: 2.4189; Validation loss: 9.3860
Iteration: 5702; Percent complete: 57.0%; Training loss: 2.3936; Validation loss: 9.5668
Iteration: 5703; Percent complete: 57.0%; Training loss: 2.5913; Validation loss: 9.1733
Iteration: 5704; Percent complete: 57.0%; Training loss: 2.4150; Validation loss: 9.6014
Iteration: 5705; Percent complete: 57.0%; Training loss: 2.2755; Validation loss: 8.9406
Iteration: 5706; Perc

Iteration: 5788; Percent complete: 57.9%; Training loss: 2.2862; Validation loss: 9.2797
Iteration: 5789; Percent complete: 57.9%; Training loss: 2.3451; Validation loss: 9.4531
Iteration: 5790; Percent complete: 57.9%; Training loss: 2.1203; Validation loss: 9.3037
Iteration: 5791; Percent complete: 57.9%; Training loss: 2.1911; Validation loss: 9.4115
Iteration: 5792; Percent complete: 57.9%; Training loss: 2.1403; Validation loss: 9.1947
Iteration: 5793; Percent complete: 57.9%; Training loss: 2.4317; Validation loss: 9.3130
Iteration: 5794; Percent complete: 57.9%; Training loss: 2.3420; Validation loss: 9.2750
Iteration: 5795; Percent complete: 58.0%; Training loss: 2.2374; Validation loss: 9.3099
Iteration: 5796; Percent complete: 58.0%; Training loss: 2.2841; Validation loss: 8.9679
Iteration: 5797; Percent complete: 58.0%; Training loss: 2.3237; Validation loss: 9.2443
Iteration: 5798; Percent complete: 58.0%; Training loss: 2.1312; Validation loss: 8.9959
Iteration: 5799; Perc

Iteration: 5881; Percent complete: 58.8%; Training loss: 2.2451; Validation loss: 9.4619
Iteration: 5882; Percent complete: 58.8%; Training loss: 2.5188; Validation loss: 9.6325
Iteration: 5883; Percent complete: 58.8%; Training loss: 2.2274; Validation loss: 9.3397
Iteration: 5884; Percent complete: 58.8%; Training loss: 2.0515; Validation loss: 9.6336
Iteration: 5885; Percent complete: 58.9%; Training loss: 2.1556; Validation loss: 9.3610
Iteration: 5886; Percent complete: 58.9%; Training loss: 2.1796; Validation loss: 9.0238
Iteration: 5887; Percent complete: 58.9%; Training loss: 2.2346; Validation loss: 9.3298
Iteration: 5888; Percent complete: 58.9%; Training loss: 2.2419; Validation loss: 9.4217
Iteration: 5889; Percent complete: 58.9%; Training loss: 2.1795; Validation loss: 9.7255
Iteration: 5890; Percent complete: 58.9%; Training loss: 2.0744; Validation loss: 9.3627
Iteration: 5891; Percent complete: 58.9%; Training loss: 2.1318; Validation loss: 9.5011
Iteration: 5892; Perc

Iteration: 5974; Percent complete: 59.7%; Training loss: 2.2751; Validation loss: 9.1928
Iteration: 5975; Percent complete: 59.8%; Training loss: 2.0755; Validation loss: 9.4664
Iteration: 5976; Percent complete: 59.8%; Training loss: 2.0733; Validation loss: 9.5379
Iteration: 5977; Percent complete: 59.8%; Training loss: 2.5884; Validation loss: 9.4917
Iteration: 5978; Percent complete: 59.8%; Training loss: 1.9917; Validation loss: 9.3295
Iteration: 5979; Percent complete: 59.8%; Training loss: 2.1198; Validation loss: 9.1072
Iteration: 5980; Percent complete: 59.8%; Training loss: 2.3375; Validation loss: 9.3721
Iteration: 5981; Percent complete: 59.8%; Training loss: 2.2366; Validation loss: 9.4114
Iteration: 5982; Percent complete: 59.8%; Training loss: 2.0645; Validation loss: 9.5182
Iteration: 5983; Percent complete: 59.8%; Training loss: 2.0754; Validation loss: 9.7336
Iteration: 5984; Percent complete: 59.8%; Training loss: 2.3690; Validation loss: 10.1364
Iteration: 5985; Per

Iteration: 6067; Percent complete: 60.7%; Training loss: 2.0405; Validation loss: 9.4627
Iteration: 6068; Percent complete: 60.7%; Training loss: 1.9388; Validation loss: 9.4998
Iteration: 6069; Percent complete: 60.7%; Training loss: 2.2794; Validation loss: 9.7942
Iteration: 6070; Percent complete: 60.7%; Training loss: 2.3861; Validation loss: 9.2444
Iteration: 6071; Percent complete: 60.7%; Training loss: 2.2297; Validation loss: 9.5028
Iteration: 6072; Percent complete: 60.7%; Training loss: 2.4616; Validation loss: 9.2890
Iteration: 6073; Percent complete: 60.7%; Training loss: 2.0404; Validation loss: 9.4768
Iteration: 6074; Percent complete: 60.7%; Training loss: 2.1887; Validation loss: 9.5992
Iteration: 6075; Percent complete: 60.8%; Training loss: 2.0589; Validation loss: 9.6513
Iteration: 6076; Percent complete: 60.8%; Training loss: 2.1500; Validation loss: 9.2427
Iteration: 6077; Percent complete: 60.8%; Training loss: 2.1571; Validation loss: 9.5346
Iteration: 6078; Perc

Iteration: 6160; Percent complete: 61.6%; Training loss: 2.0649; Validation loss: 9.4178
Iteration: 6161; Percent complete: 61.6%; Training loss: 1.8966; Validation loss: 9.4832
Iteration: 6162; Percent complete: 61.6%; Training loss: 2.0930; Validation loss: 9.2853
Iteration: 6163; Percent complete: 61.6%; Training loss: 2.0224; Validation loss: 9.3108
Iteration: 6164; Percent complete: 61.6%; Training loss: 2.3679; Validation loss: 9.7114
Iteration: 6165; Percent complete: 61.7%; Training loss: 2.3944; Validation loss: 9.6590
Iteration: 6166; Percent complete: 61.7%; Training loss: 2.1366; Validation loss: 9.4956
Iteration: 6167; Percent complete: 61.7%; Training loss: 2.3232; Validation loss: 9.7849
Iteration: 6168; Percent complete: 61.7%; Training loss: 1.9633; Validation loss: 9.0783
Iteration: 6169; Percent complete: 61.7%; Training loss: 2.2607; Validation loss: 9.1585
Iteration: 6170; Percent complete: 61.7%; Training loss: 2.0984; Validation loss: 9.1857
Iteration: 6171; Perc

Iteration: 6253; Percent complete: 62.5%; Training loss: 2.1239; Validation loss: 9.1918
Iteration: 6254; Percent complete: 62.5%; Training loss: 1.9907; Validation loss: 9.6428
Iteration: 6255; Percent complete: 62.5%; Training loss: 2.0444; Validation loss: 9.7874
Iteration: 6256; Percent complete: 62.6%; Training loss: 2.1618; Validation loss: 9.8212
Iteration: 6257; Percent complete: 62.6%; Training loss: 1.8928; Validation loss: 9.7504
Iteration: 6258; Percent complete: 62.6%; Training loss: 2.1062; Validation loss: 10.1353
Iteration: 6259; Percent complete: 62.6%; Training loss: 2.2852; Validation loss: 9.6057
Iteration: 6260; Percent complete: 62.6%; Training loss: 2.0113; Validation loss: 9.5323
Iteration: 6261; Percent complete: 62.6%; Training loss: 1.9718; Validation loss: 10.1677
Iteration: 6262; Percent complete: 62.6%; Training loss: 2.3075; Validation loss: 9.4038
Iteration: 6263; Percent complete: 62.6%; Training loss: 2.1690; Validation loss: 9.9109
Iteration: 6264; Pe

Iteration: 6345; Percent complete: 63.4%; Training loss: 1.8824; Validation loss: 9.6547
Iteration: 6346; Percent complete: 63.5%; Training loss: 1.9283; Validation loss: 9.4185
Iteration: 6347; Percent complete: 63.5%; Training loss: 2.1978; Validation loss: 9.6265
Iteration: 6348; Percent complete: 63.5%; Training loss: 2.0414; Validation loss: 9.5185
Iteration: 6349; Percent complete: 63.5%; Training loss: 1.9426; Validation loss: 9.8072
Iteration: 6350; Percent complete: 63.5%; Training loss: 1.9117; Validation loss: 9.6033
Iteration: 6351; Percent complete: 63.5%; Training loss: 1.8655; Validation loss: 9.7995
Iteration: 6352; Percent complete: 63.5%; Training loss: 2.0625; Validation loss: 9.9991
Iteration: 6353; Percent complete: 63.5%; Training loss: 1.9971; Validation loss: 9.6546
Iteration: 6354; Percent complete: 63.5%; Training loss: 1.9868; Validation loss: 9.5392
Iteration: 6355; Percent complete: 63.5%; Training loss: 2.0353; Validation loss: 9.1857
Iteration: 6356; Perc

Iteration: 6438; Percent complete: 64.4%; Training loss: 2.2402; Validation loss: 9.6071
Iteration: 6439; Percent complete: 64.4%; Training loss: 1.9229; Validation loss: 9.1111
Iteration: 6440; Percent complete: 64.4%; Training loss: 2.0160; Validation loss: 9.1692
Iteration: 6441; Percent complete: 64.4%; Training loss: 2.0539; Validation loss: 9.1050
Iteration: 6442; Percent complete: 64.4%; Training loss: 1.8635; Validation loss: 9.3452
Iteration: 6443; Percent complete: 64.4%; Training loss: 2.0076; Validation loss: 9.4889
Iteration: 6444; Percent complete: 64.4%; Training loss: 2.1261; Validation loss: 9.6306
Iteration: 6445; Percent complete: 64.5%; Training loss: 2.0210; Validation loss: 9.3706
Iteration: 6446; Percent complete: 64.5%; Training loss: 2.0522; Validation loss: 9.2419
Iteration: 6447; Percent complete: 64.5%; Training loss: 1.9251; Validation loss: 9.4591
Iteration: 6448; Percent complete: 64.5%; Training loss: 2.0897; Validation loss: 9.9023
Iteration: 6449; Perc

Iteration: 6530; Percent complete: 65.3%; Training loss: 2.0052; Validation loss: 9.6553
Iteration: 6531; Percent complete: 65.3%; Training loss: 1.8048; Validation loss: 9.9931
Iteration: 6532; Percent complete: 65.3%; Training loss: 1.8841; Validation loss: 9.6167
Iteration: 6533; Percent complete: 65.3%; Training loss: 1.9835; Validation loss: 9.4312
Iteration: 6534; Percent complete: 65.3%; Training loss: 2.0818; Validation loss: 9.8133
Iteration: 6535; Percent complete: 65.3%; Training loss: 2.0453; Validation loss: 10.1270
Iteration: 6536; Percent complete: 65.4%; Training loss: 1.7958; Validation loss: 9.3470
Iteration: 6537; Percent complete: 65.4%; Training loss: 1.8725; Validation loss: 9.8630
Iteration: 6538; Percent complete: 65.4%; Training loss: 1.9482; Validation loss: 9.2455
Iteration: 6539; Percent complete: 65.4%; Training loss: 2.0200; Validation loss: 9.4276
Iteration: 6540; Percent complete: 65.4%; Training loss: 1.9913; Validation loss: 9.6964
Iteration: 6541; Per

Iteration: 6622; Percent complete: 66.2%; Training loss: 1.8783; Validation loss: 10.0651
Iteration: 6623; Percent complete: 66.2%; Training loss: 2.0461; Validation loss: 9.6277
Iteration: 6624; Percent complete: 66.2%; Training loss: 2.1232; Validation loss: 9.6495
Iteration: 6625; Percent complete: 66.2%; Training loss: 1.7621; Validation loss: 9.3703
Iteration: 6626; Percent complete: 66.3%; Training loss: 2.1687; Validation loss: 9.7117
Iteration: 6627; Percent complete: 66.3%; Training loss: 2.1469; Validation loss: 9.6742
Iteration: 6628; Percent complete: 66.3%; Training loss: 1.9576; Validation loss: 9.5258
Iteration: 6629; Percent complete: 66.3%; Training loss: 1.9805; Validation loss: 9.5933
Iteration: 6630; Percent complete: 66.3%; Training loss: 2.1809; Validation loss: 9.5957
Iteration: 6631; Percent complete: 66.3%; Training loss: 2.1078; Validation loss: 9.4438
Iteration: 6632; Percent complete: 66.3%; Training loss: 2.0051; Validation loss: 9.8569
Iteration: 6633; Per

Iteration: 6714; Percent complete: 67.1%; Training loss: 2.0346; Validation loss: 9.7860
Iteration: 6715; Percent complete: 67.2%; Training loss: 2.1833; Validation loss: 9.2147
Iteration: 6716; Percent complete: 67.2%; Training loss: 1.9566; Validation loss: 9.1805
Iteration: 6717; Percent complete: 67.2%; Training loss: 1.9422; Validation loss: 9.7619
Iteration: 6718; Percent complete: 67.2%; Training loss: 1.9110; Validation loss: 9.3527
Iteration: 6719; Percent complete: 67.2%; Training loss: 2.0472; Validation loss: 10.0947
Iteration: 6720; Percent complete: 67.2%; Training loss: 2.0083; Validation loss: 10.1082
Iteration: 6721; Percent complete: 67.2%; Training loss: 1.8024; Validation loss: 10.2462
Iteration: 6722; Percent complete: 67.2%; Training loss: 1.8681; Validation loss: 9.6363
Iteration: 6723; Percent complete: 67.2%; Training loss: 1.8750; Validation loss: 9.5600
Iteration: 6724; Percent complete: 67.2%; Training loss: 1.9920; Validation loss: 9.7012
Iteration: 6725; P

Iteration: 6806; Percent complete: 68.1%; Training loss: 2.0304; Validation loss: 9.6396
Iteration: 6807; Percent complete: 68.1%; Training loss: 2.1045; Validation loss: 9.7033
Iteration: 6808; Percent complete: 68.1%; Training loss: 2.0008; Validation loss: 10.0229
Iteration: 6809; Percent complete: 68.1%; Training loss: 1.9981; Validation loss: 9.8619
Iteration: 6810; Percent complete: 68.1%; Training loss: 1.8964; Validation loss: 9.4028
Iteration: 6811; Percent complete: 68.1%; Training loss: 2.0042; Validation loss: 9.5821
Iteration: 6812; Percent complete: 68.1%; Training loss: 1.8419; Validation loss: 9.6426
Iteration: 6813; Percent complete: 68.1%; Training loss: 1.8715; Validation loss: 9.4097
Iteration: 6814; Percent complete: 68.1%; Training loss: 1.9586; Validation loss: 9.0606
Iteration: 6815; Percent complete: 68.2%; Training loss: 2.0302; Validation loss: 9.5298
Iteration: 6816; Percent complete: 68.2%; Training loss: 1.9295; Validation loss: 9.3591
Iteration: 6817; Per

Iteration: 6898; Percent complete: 69.0%; Training loss: 1.9757; Validation loss: 9.7935
Iteration: 6899; Percent complete: 69.0%; Training loss: 1.8996; Validation loss: 9.9515
Iteration: 6900; Percent complete: 69.0%; Training loss: 1.9189; Validation loss: 9.5710
Iteration: 6901; Percent complete: 69.0%; Training loss: 1.9656; Validation loss: 9.5202
Iteration: 6902; Percent complete: 69.0%; Training loss: 1.7448; Validation loss: 9.9889
Iteration: 6903; Percent complete: 69.0%; Training loss: 1.9835; Validation loss: 9.5365
Iteration: 6904; Percent complete: 69.0%; Training loss: 1.9113; Validation loss: 9.3327
Iteration: 6905; Percent complete: 69.0%; Training loss: 2.1211; Validation loss: 10.0870
Iteration: 6906; Percent complete: 69.1%; Training loss: 2.0904; Validation loss: 9.6454
Iteration: 6907; Percent complete: 69.1%; Training loss: 1.9654; Validation loss: 9.5914
Iteration: 6908; Percent complete: 69.1%; Training loss: 1.9515; Validation loss: 9.6222
Iteration: 6909; Per

Iteration: 6990; Percent complete: 69.9%; Training loss: 1.8376; Validation loss: 9.7295
Iteration: 6991; Percent complete: 69.9%; Training loss: 1.9512; Validation loss: 10.0150
Iteration: 6992; Percent complete: 69.9%; Training loss: 2.0032; Validation loss: 9.6117
Iteration: 6993; Percent complete: 69.9%; Training loss: 1.8637; Validation loss: 9.6461
Iteration: 6994; Percent complete: 69.9%; Training loss: 2.0287; Validation loss: 9.8167
Iteration: 6995; Percent complete: 70.0%; Training loss: 2.1094; Validation loss: 9.7295
Iteration: 6996; Percent complete: 70.0%; Training loss: 2.0695; Validation loss: 9.7481
Iteration: 6997; Percent complete: 70.0%; Training loss: 1.9846; Validation loss: 9.7200
Iteration: 6998; Percent complete: 70.0%; Training loss: 1.7231; Validation loss: 9.7216
Iteration: 6999; Percent complete: 70.0%; Training loss: 1.8913; Validation loss: 9.8961
Iteration: 7000; Percent complete: 70.0%; Training loss: 1.9886; Validation loss: 9.4743
Iteration: 7001; Per

Iteration: 7082; Percent complete: 70.8%; Training loss: 1.8608; Validation loss: 9.8954
Iteration: 7083; Percent complete: 70.8%; Training loss: 1.7859; Validation loss: 10.1289
Iteration: 7084; Percent complete: 70.8%; Training loss: 1.9373; Validation loss: 9.6467
Iteration: 7085; Percent complete: 70.9%; Training loss: 1.9609; Validation loss: 9.7866
Iteration: 7086; Percent complete: 70.9%; Training loss: 1.7964; Validation loss: 9.7872
Iteration: 7087; Percent complete: 70.9%; Training loss: 1.8929; Validation loss: 9.8456
Iteration: 7088; Percent complete: 70.9%; Training loss: 1.9004; Validation loss: 9.5460
Iteration: 7089; Percent complete: 70.9%; Training loss: 1.9837; Validation loss: 9.6818
Iteration: 7090; Percent complete: 70.9%; Training loss: 2.0111; Validation loss: 9.6562
Iteration: 7091; Percent complete: 70.9%; Training loss: 1.8585; Validation loss: 9.0853
Iteration: 7092; Percent complete: 70.9%; Training loss: 2.0827; Validation loss: 9.7146
Iteration: 7093; Per

Iteration: 7174; Percent complete: 71.7%; Training loss: 1.8512; Validation loss: 10.1144
Iteration: 7175; Percent complete: 71.8%; Training loss: 1.8504; Validation loss: 9.9663
Iteration: 7176; Percent complete: 71.8%; Training loss: 1.9811; Validation loss: 10.2371
Iteration: 7177; Percent complete: 71.8%; Training loss: 1.7950; Validation loss: 9.7478
Iteration: 7178; Percent complete: 71.8%; Training loss: 1.8311; Validation loss: 9.8108
Iteration: 7179; Percent complete: 71.8%; Training loss: 1.8489; Validation loss: 9.2928
Iteration: 7180; Percent complete: 71.8%; Training loss: 1.7184; Validation loss: 9.6376
Iteration: 7181; Percent complete: 71.8%; Training loss: 1.9372; Validation loss: 9.3578
Iteration: 7182; Percent complete: 71.8%; Training loss: 1.9296; Validation loss: 9.7429
Iteration: 7183; Percent complete: 71.8%; Training loss: 1.9454; Validation loss: 9.7437
Iteration: 7184; Percent complete: 71.8%; Training loss: 1.6433; Validation loss: 9.8009
Iteration: 7185; Pe

Iteration: 7266; Percent complete: 72.7%; Training loss: 1.9344; Validation loss: 9.8770
Iteration: 7267; Percent complete: 72.7%; Training loss: 1.8091; Validation loss: 9.8472
Iteration: 7268; Percent complete: 72.7%; Training loss: 1.8986; Validation loss: 9.4472
Iteration: 7269; Percent complete: 72.7%; Training loss: 1.7953; Validation loss: 9.8133
Iteration: 7270; Percent complete: 72.7%; Training loss: 1.8777; Validation loss: 9.6100
Iteration: 7271; Percent complete: 72.7%; Training loss: 1.9088; Validation loss: 9.3973
Iteration: 7272; Percent complete: 72.7%; Training loss: 1.8728; Validation loss: 9.6973
Iteration: 7273; Percent complete: 72.7%; Training loss: 1.9313; Validation loss: 10.0148
Iteration: 7274; Percent complete: 72.7%; Training loss: 1.8328; Validation loss: 9.3600
Iteration: 7275; Percent complete: 72.8%; Training loss: 1.8996; Validation loss: 10.1435
Iteration: 7276; Percent complete: 72.8%; Training loss: 1.9006; Validation loss: 9.6650
Iteration: 7277; Pe

Iteration: 7358; Percent complete: 73.6%; Training loss: 1.8112; Validation loss: 10.0632
Iteration: 7359; Percent complete: 73.6%; Training loss: 1.7693; Validation loss: 10.4008
Iteration: 7360; Percent complete: 73.6%; Training loss: 1.8500; Validation loss: 9.6837
Iteration: 7361; Percent complete: 73.6%; Training loss: 1.9573; Validation loss: 9.9918
Iteration: 7362; Percent complete: 73.6%; Training loss: 1.9997; Validation loss: 9.8534
Iteration: 7363; Percent complete: 73.6%; Training loss: 1.9399; Validation loss: 9.7700
Iteration: 7364; Percent complete: 73.6%; Training loss: 1.5365; Validation loss: 10.1040
Iteration: 7365; Percent complete: 73.7%; Training loss: 1.9926; Validation loss: 9.4025
Iteration: 7366; Percent complete: 73.7%; Training loss: 1.8131; Validation loss: 9.8972
Iteration: 7367; Percent complete: 73.7%; Training loss: 1.7816; Validation loss: 9.4396
Iteration: 7368; Percent complete: 73.7%; Training loss: 1.8496; Validation loss: 9.8500
Iteration: 7369; P

Iteration: 7450; Percent complete: 74.5%; Training loss: 1.7524; Validation loss: 10.2774
Iteration: 7451; Percent complete: 74.5%; Training loss: 1.7646; Validation loss: 9.8104
Iteration: 7452; Percent complete: 74.5%; Training loss: 1.7793; Validation loss: 9.4035
Iteration: 7453; Percent complete: 74.5%; Training loss: 1.6935; Validation loss: 9.4982
Iteration: 7454; Percent complete: 74.5%; Training loss: 1.7959; Validation loss: 9.8777
Iteration: 7455; Percent complete: 74.6%; Training loss: 1.8787; Validation loss: 10.0955
Iteration: 7456; Percent complete: 74.6%; Training loss: 1.7456; Validation loss: 9.9339
Iteration: 7457; Percent complete: 74.6%; Training loss: 1.8137; Validation loss: 10.0244
Iteration: 7458; Percent complete: 74.6%; Training loss: 1.9171; Validation loss: 10.2599
Iteration: 7459; Percent complete: 74.6%; Training loss: 1.6650; Validation loss: 9.5732
Iteration: 7460; Percent complete: 74.6%; Training loss: 1.8860; Validation loss: 9.4601
Iteration: 7461; 

Iteration: 7542; Percent complete: 75.4%; Training loss: 1.9740; Validation loss: 9.9426
Iteration: 7543; Percent complete: 75.4%; Training loss: 1.7842; Validation loss: 10.1655
Iteration: 7544; Percent complete: 75.4%; Training loss: 1.6612; Validation loss: 10.0061
Iteration: 7545; Percent complete: 75.4%; Training loss: 1.7744; Validation loss: 10.4208
Iteration: 7546; Percent complete: 75.5%; Training loss: 1.9003; Validation loss: 9.9853
Iteration: 7547; Percent complete: 75.5%; Training loss: 1.8648; Validation loss: 9.8315
Iteration: 7548; Percent complete: 75.5%; Training loss: 1.9491; Validation loss: 10.1161
Iteration: 7549; Percent complete: 75.5%; Training loss: 1.6894; Validation loss: 10.1257
Iteration: 7550; Percent complete: 75.5%; Training loss: 1.9827; Validation loss: 10.0223
Iteration: 7551; Percent complete: 75.5%; Training loss: 1.7597; Validation loss: 9.7384
Iteration: 7552; Percent complete: 75.5%; Training loss: 1.9967; Validation loss: 9.4502
Iteration: 7553

Iteration: 7634; Percent complete: 76.3%; Training loss: 1.8035; Validation loss: 9.6029
Iteration: 7635; Percent complete: 76.3%; Training loss: 1.6415; Validation loss: 9.4160
Iteration: 7636; Percent complete: 76.4%; Training loss: 1.7962; Validation loss: 9.8394
Iteration: 7637; Percent complete: 76.4%; Training loss: 1.7558; Validation loss: 10.1530
Iteration: 7638; Percent complete: 76.4%; Training loss: 1.6272; Validation loss: 9.9236
Iteration: 7639; Percent complete: 76.4%; Training loss: 1.8195; Validation loss: 10.5141
Iteration: 7640; Percent complete: 76.4%; Training loss: 1.7870; Validation loss: 9.5981
Iteration: 7641; Percent complete: 76.4%; Training loss: 1.6459; Validation loss: 10.1710
Iteration: 7642; Percent complete: 76.4%; Training loss: 1.6847; Validation loss: 10.0979
Iteration: 7643; Percent complete: 76.4%; Training loss: 1.7574; Validation loss: 9.8323
Iteration: 7644; Percent complete: 76.4%; Training loss: 1.7614; Validation loss: 10.1892
Iteration: 7645;

Iteration: 7726; Percent complete: 77.3%; Training loss: 1.7691; Validation loss: 9.8232
Iteration: 7727; Percent complete: 77.3%; Training loss: 1.8056; Validation loss: 10.4124
Iteration: 7728; Percent complete: 77.3%; Training loss: 1.6390; Validation loss: 9.5013
Iteration: 7729; Percent complete: 77.3%; Training loss: 1.8271; Validation loss: 9.8291
Iteration: 7730; Percent complete: 77.3%; Training loss: 1.7976; Validation loss: 9.4619
Iteration: 7731; Percent complete: 77.3%; Training loss: 1.7992; Validation loss: 10.0608
Iteration: 7732; Percent complete: 77.3%; Training loss: 1.7961; Validation loss: 9.4618
Iteration: 7733; Percent complete: 77.3%; Training loss: 1.6758; Validation loss: 9.8782
Iteration: 7734; Percent complete: 77.3%; Training loss: 1.5479; Validation loss: 10.1508
Iteration: 7735; Percent complete: 77.3%; Training loss: 1.7565; Validation loss: 9.5246
Iteration: 7736; Percent complete: 77.4%; Training loss: 1.8546; Validation loss: 9.7003
Iteration: 7737; P

Iteration: 7818; Percent complete: 78.2%; Training loss: 1.8140; Validation loss: 9.7818
Iteration: 7819; Percent complete: 78.2%; Training loss: 1.6967; Validation loss: 9.9927
Iteration: 7820; Percent complete: 78.2%; Training loss: 1.6732; Validation loss: 10.2039
Iteration: 7821; Percent complete: 78.2%; Training loss: 1.5728; Validation loss: 9.8388
Iteration: 7822; Percent complete: 78.2%; Training loss: 1.6649; Validation loss: 10.2676
Iteration: 7823; Percent complete: 78.2%; Training loss: 1.8306; Validation loss: 9.9791
Iteration: 7824; Percent complete: 78.2%; Training loss: 1.9247; Validation loss: 9.8534
Iteration: 7825; Percent complete: 78.2%; Training loss: 1.7835; Validation loss: 10.3261
Iteration: 7826; Percent complete: 78.3%; Training loss: 1.7855; Validation loss: 9.9139
Iteration: 7827; Percent complete: 78.3%; Training loss: 1.8069; Validation loss: 10.0969
Iteration: 7828; Percent complete: 78.3%; Training loss: 1.8131; Validation loss: 10.4588
Iteration: 7829;

Iteration: 7910; Percent complete: 79.1%; Training loss: 1.7428; Validation loss: 10.3617
Iteration: 7911; Percent complete: 79.1%; Training loss: 1.8173; Validation loss: 9.3231
Iteration: 7912; Percent complete: 79.1%; Training loss: 1.7405; Validation loss: 10.3236
Iteration: 7913; Percent complete: 79.1%; Training loss: 1.7976; Validation loss: 10.2864
Iteration: 7914; Percent complete: 79.1%; Training loss: 1.5945; Validation loss: 9.9920
Iteration: 7915; Percent complete: 79.1%; Training loss: 2.0113; Validation loss: 10.4438
Iteration: 7916; Percent complete: 79.2%; Training loss: 1.8030; Validation loss: 9.9449
Iteration: 7917; Percent complete: 79.2%; Training loss: 1.8723; Validation loss: 9.9497
Iteration: 7918; Percent complete: 79.2%; Training loss: 1.8612; Validation loss: 10.0945
Iteration: 7919; Percent complete: 79.2%; Training loss: 1.9615; Validation loss: 10.0304
Iteration: 7920; Percent complete: 79.2%; Training loss: 1.7659; Validation loss: 10.2926
Iteration: 792

Iteration: 8002; Percent complete: 80.0%; Training loss: 1.8594; Validation loss: 10.0180
Iteration: 8003; Percent complete: 80.0%; Training loss: 1.6595; Validation loss: 9.5882
Iteration: 8004; Percent complete: 80.0%; Training loss: 1.7967; Validation loss: 10.4218
Iteration: 8005; Percent complete: 80.0%; Training loss: 1.7766; Validation loss: 10.1111
Iteration: 8006; Percent complete: 80.1%; Training loss: 1.7189; Validation loss: 10.2709
Iteration: 8007; Percent complete: 80.1%; Training loss: 1.8805; Validation loss: 10.0216
Iteration: 8008; Percent complete: 80.1%; Training loss: 1.9608; Validation loss: 10.2039
Iteration: 8009; Percent complete: 80.1%; Training loss: 1.5368; Validation loss: 10.4229
Iteration: 8010; Percent complete: 80.1%; Training loss: 1.7091; Validation loss: 10.0916
Iteration: 8011; Percent complete: 80.1%; Training loss: 1.4930; Validation loss: 9.8493
Iteration: 8012; Percent complete: 80.1%; Training loss: 1.6142; Validation loss: 10.0171
Iteration: 8

Iteration: 8094; Percent complete: 80.9%; Training loss: 1.8398; Validation loss: 9.9331
Iteration: 8095; Percent complete: 81.0%; Training loss: 1.7219; Validation loss: 10.3597
Iteration: 8096; Percent complete: 81.0%; Training loss: 1.6198; Validation loss: 9.9588
Iteration: 8097; Percent complete: 81.0%; Training loss: 1.6432; Validation loss: 10.3780
Iteration: 8098; Percent complete: 81.0%; Training loss: 1.7407; Validation loss: 10.3508
Iteration: 8099; Percent complete: 81.0%; Training loss: 1.5752; Validation loss: 9.9451
Iteration: 8100; Percent complete: 81.0%; Training loss: 1.7159; Validation loss: 10.0915
Iteration: 8101; Percent complete: 81.0%; Training loss: 1.6157; Validation loss: 9.5399
Iteration: 8102; Percent complete: 81.0%; Training loss: 1.6933; Validation loss: 10.5359
Iteration: 8103; Percent complete: 81.0%; Training loss: 1.8399; Validation loss: 10.3382
Iteration: 8104; Percent complete: 81.0%; Training loss: 1.6883; Validation loss: 10.1053
Iteration: 810

Iteration: 8186; Percent complete: 81.9%; Training loss: 1.7698; Validation loss: 10.3519
Iteration: 8187; Percent complete: 81.9%; Training loss: 1.7250; Validation loss: 11.0441
Iteration: 8188; Percent complete: 81.9%; Training loss: 1.5992; Validation loss: 10.5507
Iteration: 8189; Percent complete: 81.9%; Training loss: 1.6767; Validation loss: 10.0127
Iteration: 8190; Percent complete: 81.9%; Training loss: 1.6702; Validation loss: 10.1612
Iteration: 8191; Percent complete: 81.9%; Training loss: 1.6440; Validation loss: 10.8418
Iteration: 8192; Percent complete: 81.9%; Training loss: 1.6695; Validation loss: 9.9403
Iteration: 8193; Percent complete: 81.9%; Training loss: 1.6277; Validation loss: 10.2386
Iteration: 8194; Percent complete: 81.9%; Training loss: 1.6114; Validation loss: 10.8072
Iteration: 8195; Percent complete: 82.0%; Training loss: 1.7021; Validation loss: 10.2868
Iteration: 8196; Percent complete: 82.0%; Training loss: 1.5452; Validation loss: 10.9364
Iteration: 

Iteration: 8278; Percent complete: 82.8%; Training loss: 1.7665; Validation loss: 10.1447
Iteration: 8279; Percent complete: 82.8%; Training loss: 1.6268; Validation loss: 9.8678
Iteration: 8280; Percent complete: 82.8%; Training loss: 1.6597; Validation loss: 10.1109
Iteration: 8281; Percent complete: 82.8%; Training loss: 1.8194; Validation loss: 10.5156
Iteration: 8282; Percent complete: 82.8%; Training loss: 1.6488; Validation loss: 9.9254
Iteration: 8283; Percent complete: 82.8%; Training loss: 1.5609; Validation loss: 9.7250
Iteration: 8284; Percent complete: 82.8%; Training loss: 1.6189; Validation loss: 9.4603
Iteration: 8285; Percent complete: 82.8%; Training loss: 1.7276; Validation loss: 10.2819
Iteration: 8286; Percent complete: 82.9%; Training loss: 1.4804; Validation loss: 9.9284
Iteration: 8287; Percent complete: 82.9%; Training loss: 1.7952; Validation loss: 10.6294
Iteration: 8288; Percent complete: 82.9%; Training loss: 1.7412; Validation loss: 10.0064
Iteration: 8289

Iteration: 8370; Percent complete: 83.7%; Training loss: 1.8169; Validation loss: 10.2519
Iteration: 8371; Percent complete: 83.7%; Training loss: 1.6860; Validation loss: 10.7795
Iteration: 8372; Percent complete: 83.7%; Training loss: 1.5670; Validation loss: 10.1651
Iteration: 8373; Percent complete: 83.7%; Training loss: 1.5808; Validation loss: 10.2682
Iteration: 8374; Percent complete: 83.7%; Training loss: 1.7510; Validation loss: 10.1907
Iteration: 8375; Percent complete: 83.8%; Training loss: 1.6343; Validation loss: 10.2676
Iteration: 8376; Percent complete: 83.8%; Training loss: 1.7751; Validation loss: 9.9828
Iteration: 8377; Percent complete: 83.8%; Training loss: 1.6794; Validation loss: 10.2957
Iteration: 8378; Percent complete: 83.8%; Training loss: 1.6460; Validation loss: 10.1371
Iteration: 8379; Percent complete: 83.8%; Training loss: 1.6284; Validation loss: 10.0016
Iteration: 8380; Percent complete: 83.8%; Training loss: 1.7501; Validation loss: 10.0445
Iteration: 

Iteration: 8462; Percent complete: 84.6%; Training loss: 1.5663; Validation loss: 9.9799
Iteration: 8463; Percent complete: 84.6%; Training loss: 1.7405; Validation loss: 10.1188
Iteration: 8464; Percent complete: 84.6%; Training loss: 1.6475; Validation loss: 11.0871
Iteration: 8465; Percent complete: 84.7%; Training loss: 1.6860; Validation loss: 9.8831
Iteration: 8466; Percent complete: 84.7%; Training loss: 1.6515; Validation loss: 10.9395
Iteration: 8467; Percent complete: 84.7%; Training loss: 1.6303; Validation loss: 10.6522
Iteration: 8468; Percent complete: 84.7%; Training loss: 1.6318; Validation loss: 10.3597
Iteration: 8469; Percent complete: 84.7%; Training loss: 1.6773; Validation loss: 10.5381
Iteration: 8470; Percent complete: 84.7%; Training loss: 1.4681; Validation loss: 10.4331
Iteration: 8471; Percent complete: 84.7%; Training loss: 1.6020; Validation loss: 10.3968
Iteration: 8472; Percent complete: 84.7%; Training loss: 1.5158; Validation loss: 9.9431
Iteration: 84

Iteration: 8554; Percent complete: 85.5%; Training loss: 1.5117; Validation loss: 9.8089
Iteration: 8555; Percent complete: 85.5%; Training loss: 1.5510; Validation loss: 10.5640
Iteration: 8556; Percent complete: 85.6%; Training loss: 1.8786; Validation loss: 10.5360
Iteration: 8557; Percent complete: 85.6%; Training loss: 1.7933; Validation loss: 10.4781
Iteration: 8558; Percent complete: 85.6%; Training loss: 1.5108; Validation loss: 10.8446
Iteration: 8559; Percent complete: 85.6%; Training loss: 1.7072; Validation loss: 11.0482
Iteration: 8560; Percent complete: 85.6%; Training loss: 1.4784; Validation loss: 10.3390
Iteration: 8561; Percent complete: 85.6%; Training loss: 1.5264; Validation loss: 10.6761
Iteration: 8562; Percent complete: 85.6%; Training loss: 1.5630; Validation loss: 9.9292
Iteration: 8563; Percent complete: 85.6%; Training loss: 1.6895; Validation loss: 10.5030
Iteration: 8564; Percent complete: 85.6%; Training loss: 1.8619; Validation loss: 10.0469
Iteration: 8

Iteration: 8646; Percent complete: 86.5%; Training loss: 1.7482; Validation loss: 10.7086
Iteration: 8647; Percent complete: 86.5%; Training loss: 1.6842; Validation loss: 10.3491
Iteration: 8648; Percent complete: 86.5%; Training loss: 1.5123; Validation loss: 10.7705
Iteration: 8649; Percent complete: 86.5%; Training loss: 1.4934; Validation loss: 10.1282
Iteration: 8650; Percent complete: 86.5%; Training loss: 1.6087; Validation loss: 10.7243
Iteration: 8651; Percent complete: 86.5%; Training loss: 1.5072; Validation loss: 10.0358
Iteration: 8652; Percent complete: 86.5%; Training loss: 1.5929; Validation loss: 10.3524
Iteration: 8653; Percent complete: 86.5%; Training loss: 1.6298; Validation loss: 10.7684
Iteration: 8654; Percent complete: 86.5%; Training loss: 1.6398; Validation loss: 10.1961
Iteration: 8655; Percent complete: 86.6%; Training loss: 1.4316; Validation loss: 10.2516
Iteration: 8656; Percent complete: 86.6%; Training loss: 1.5382; Validation loss: 10.6212
Iteration:

Iteration: 8738; Percent complete: 87.4%; Training loss: 1.6683; Validation loss: 10.6842
Iteration: 8739; Percent complete: 87.4%; Training loss: 1.6580; Validation loss: 10.0403
Iteration: 8740; Percent complete: 87.4%; Training loss: 1.8909; Validation loss: 10.5168
Iteration: 8741; Percent complete: 87.4%; Training loss: 1.6171; Validation loss: 9.9569
Iteration: 8742; Percent complete: 87.4%; Training loss: 1.5920; Validation loss: 10.5446
Iteration: 8743; Percent complete: 87.4%; Training loss: 1.4115; Validation loss: 10.4523
Iteration: 8744; Percent complete: 87.4%; Training loss: 1.5656; Validation loss: 10.5134
Iteration: 8745; Percent complete: 87.5%; Training loss: 1.3553; Validation loss: 10.6806
Iteration: 8746; Percent complete: 87.5%; Training loss: 1.7624; Validation loss: 10.3409
Iteration: 8747; Percent complete: 87.5%; Training loss: 1.6824; Validation loss: 10.3432
Iteration: 8748; Percent complete: 87.5%; Training loss: 1.5876; Validation loss: 10.2478
Iteration: 

Iteration: 8830; Percent complete: 88.3%; Training loss: 1.5358; Validation loss: 10.0158
Iteration: 8831; Percent complete: 88.3%; Training loss: 1.5568; Validation loss: 10.5214
Iteration: 8832; Percent complete: 88.3%; Training loss: 1.4901; Validation loss: 10.4517
Iteration: 8833; Percent complete: 88.3%; Training loss: 1.5386; Validation loss: 10.2745
Iteration: 8834; Percent complete: 88.3%; Training loss: 1.3560; Validation loss: 10.3639
Iteration: 8835; Percent complete: 88.3%; Training loss: 1.5728; Validation loss: 9.9585
Iteration: 8836; Percent complete: 88.4%; Training loss: 1.6302; Validation loss: 10.4509
Iteration: 8837; Percent complete: 88.4%; Training loss: 1.7490; Validation loss: 10.5615
Iteration: 8838; Percent complete: 88.4%; Training loss: 1.4523; Validation loss: 10.6619
Iteration: 8839; Percent complete: 88.4%; Training loss: 1.8063; Validation loss: 10.4076
Iteration: 8840; Percent complete: 88.4%; Training loss: 1.4528; Validation loss: 10.2942
Iteration: 

Iteration: 8922; Percent complete: 89.2%; Training loss: 1.5090; Validation loss: 10.1384
Iteration: 8923; Percent complete: 89.2%; Training loss: 1.4030; Validation loss: 10.9250
Iteration: 8924; Percent complete: 89.2%; Training loss: 1.5169; Validation loss: 10.7035
Iteration: 8925; Percent complete: 89.2%; Training loss: 1.4778; Validation loss: 10.5491
Iteration: 8926; Percent complete: 89.3%; Training loss: 1.6050; Validation loss: 10.2947
Iteration: 8927; Percent complete: 89.3%; Training loss: 1.5252; Validation loss: 10.2490
Iteration: 8928; Percent complete: 89.3%; Training loss: 1.5064; Validation loss: 9.9590
Iteration: 8929; Percent complete: 89.3%; Training loss: 1.4141; Validation loss: 10.3683
Iteration: 8930; Percent complete: 89.3%; Training loss: 1.6257; Validation loss: 10.5359
Iteration: 8931; Percent complete: 89.3%; Training loss: 1.5300; Validation loss: 10.4108
Iteration: 8932; Percent complete: 89.3%; Training loss: 1.4226; Validation loss: 10.2014
Iteration: 

Iteration: 9014; Percent complete: 90.1%; Training loss: 1.5582; Validation loss: 10.2709
Iteration: 9015; Percent complete: 90.1%; Training loss: 1.5818; Validation loss: 10.3183
Iteration: 9016; Percent complete: 90.2%; Training loss: 1.4237; Validation loss: 10.2670
Iteration: 9017; Percent complete: 90.2%; Training loss: 1.5809; Validation loss: 10.1714
Iteration: 9018; Percent complete: 90.2%; Training loss: 1.6819; Validation loss: 10.7056
Iteration: 9019; Percent complete: 90.2%; Training loss: 1.7030; Validation loss: 10.1735
Iteration: 9020; Percent complete: 90.2%; Training loss: 1.5731; Validation loss: 10.0914
Iteration: 9021; Percent complete: 90.2%; Training loss: 1.7483; Validation loss: 10.8228
Iteration: 9022; Percent complete: 90.2%; Training loss: 1.6028; Validation loss: 10.6641
Iteration: 9023; Percent complete: 90.2%; Training loss: 1.5645; Validation loss: 10.1302
Iteration: 9024; Percent complete: 90.2%; Training loss: 1.5969; Validation loss: 9.9494
Iteration: 

Iteration: 9106; Percent complete: 91.1%; Training loss: 1.5394; Validation loss: 9.7394
Iteration: 9107; Percent complete: 91.1%; Training loss: 1.4071; Validation loss: 9.8926
Iteration: 9108; Percent complete: 91.1%; Training loss: 1.6218; Validation loss: 10.3827
Iteration: 9109; Percent complete: 91.1%; Training loss: 1.4566; Validation loss: 10.3026
Iteration: 9110; Percent complete: 91.1%; Training loss: 1.5134; Validation loss: 10.6712
Iteration: 9111; Percent complete: 91.1%; Training loss: 1.4735; Validation loss: 10.9605
Iteration: 9112; Percent complete: 91.1%; Training loss: 1.6370; Validation loss: 10.5313
Iteration: 9113; Percent complete: 91.1%; Training loss: 1.3792; Validation loss: 10.5351
Iteration: 9114; Percent complete: 91.1%; Training loss: 1.6759; Validation loss: 10.5410
Iteration: 9115; Percent complete: 91.1%; Training loss: 1.5531; Validation loss: 10.6617
Iteration: 9116; Percent complete: 91.2%; Training loss: 1.5242; Validation loss: 10.4706
Iteration: 9

Iteration: 9198; Percent complete: 92.0%; Training loss: 1.5469; Validation loss: 10.1066
Iteration: 9199; Percent complete: 92.0%; Training loss: 1.7204; Validation loss: 10.1013
Iteration: 9200; Percent complete: 92.0%; Training loss: 1.5134; Validation loss: 9.9911
Iteration: 9201; Percent complete: 92.0%; Training loss: 1.3102; Validation loss: 10.4163
Iteration: 9202; Percent complete: 92.0%; Training loss: 1.5939; Validation loss: 10.6846
Iteration: 9203; Percent complete: 92.0%; Training loss: 1.3557; Validation loss: 10.7478
Iteration: 9204; Percent complete: 92.0%; Training loss: 1.3497; Validation loss: 10.1997
Iteration: 9205; Percent complete: 92.0%; Training loss: 1.4013; Validation loss: 10.1102
Iteration: 9206; Percent complete: 92.1%; Training loss: 1.4539; Validation loss: 10.9249
Iteration: 9207; Percent complete: 92.1%; Training loss: 1.5201; Validation loss: 10.4083
Iteration: 9208; Percent complete: 92.1%; Training loss: 1.6056; Validation loss: 10.1289
Iteration: 

Iteration: 9290; Percent complete: 92.9%; Training loss: 1.5621; Validation loss: 10.5690
Iteration: 9291; Percent complete: 92.9%; Training loss: 1.4643; Validation loss: 10.5030
Iteration: 9292; Percent complete: 92.9%; Training loss: 1.4282; Validation loss: 10.7665
Iteration: 9293; Percent complete: 92.9%; Training loss: 1.5239; Validation loss: 9.7642
Iteration: 9294; Percent complete: 92.9%; Training loss: 1.4075; Validation loss: 10.4684
Iteration: 9295; Percent complete: 93.0%; Training loss: 1.5152; Validation loss: 10.3885
Iteration: 9296; Percent complete: 93.0%; Training loss: 1.4228; Validation loss: 11.0292
Iteration: 9297; Percent complete: 93.0%; Training loss: 1.3964; Validation loss: 11.1556
Iteration: 9298; Percent complete: 93.0%; Training loss: 1.3426; Validation loss: 10.6970
Iteration: 9299; Percent complete: 93.0%; Training loss: 1.4544; Validation loss: 10.7933
Iteration: 9300; Percent complete: 93.0%; Training loss: 1.4152; Validation loss: 11.0823
Iteration: 

Iteration: 9382; Percent complete: 93.8%; Training loss: 1.4616; Validation loss: 10.1935
Iteration: 9383; Percent complete: 93.8%; Training loss: 1.5008; Validation loss: 10.3804
Iteration: 9384; Percent complete: 93.8%; Training loss: 1.3559; Validation loss: 10.5120
Iteration: 9385; Percent complete: 93.8%; Training loss: 1.5654; Validation loss: 10.2944
Iteration: 9386; Percent complete: 93.9%; Training loss: 1.4903; Validation loss: 10.6291
Iteration: 9387; Percent complete: 93.9%; Training loss: 1.3965; Validation loss: 10.8316
Iteration: 9388; Percent complete: 93.9%; Training loss: 1.3412; Validation loss: 10.5686
Iteration: 9389; Percent complete: 93.9%; Training loss: 1.5364; Validation loss: 10.2199
Iteration: 9390; Percent complete: 93.9%; Training loss: 1.4745; Validation loss: 11.2361
Iteration: 9391; Percent complete: 93.9%; Training loss: 1.3688; Validation loss: 10.7267
Iteration: 9392; Percent complete: 93.9%; Training loss: 1.4088; Validation loss: 10.8001
Iteration:

Iteration: 9474; Percent complete: 94.7%; Training loss: 1.3215; Validation loss: 10.8545
Iteration: 9475; Percent complete: 94.8%; Training loss: 1.4358; Validation loss: 10.5084
Iteration: 9476; Percent complete: 94.8%; Training loss: 1.4308; Validation loss: 10.5801
Iteration: 9477; Percent complete: 94.8%; Training loss: 1.4189; Validation loss: 10.4552
Iteration: 9478; Percent complete: 94.8%; Training loss: 1.5534; Validation loss: 10.2850
Iteration: 9479; Percent complete: 94.8%; Training loss: 1.5200; Validation loss: 10.6914
Iteration: 9480; Percent complete: 94.8%; Training loss: 1.4557; Validation loss: 10.7659
Iteration: 9481; Percent complete: 94.8%; Training loss: 1.7085; Validation loss: 10.6533
Iteration: 9482; Percent complete: 94.8%; Training loss: 1.5484; Validation loss: 10.1500
Iteration: 9483; Percent complete: 94.8%; Training loss: 1.4177; Validation loss: 10.5235
Iteration: 9484; Percent complete: 94.8%; Training loss: 1.5076; Validation loss: 10.5237
Iteration:

Iteration: 9566; Percent complete: 95.7%; Training loss: 1.6134; Validation loss: 11.0715
Iteration: 9567; Percent complete: 95.7%; Training loss: 1.3797; Validation loss: 10.3839
Iteration: 9568; Percent complete: 95.7%; Training loss: 1.2337; Validation loss: 10.8016
Iteration: 9569; Percent complete: 95.7%; Training loss: 1.2784; Validation loss: 10.7514
Iteration: 9570; Percent complete: 95.7%; Training loss: 1.5025; Validation loss: 10.4054
Iteration: 9571; Percent complete: 95.7%; Training loss: 1.4843; Validation loss: 10.2921
Iteration: 9572; Percent complete: 95.7%; Training loss: 1.5408; Validation loss: 10.7637
Iteration: 9573; Percent complete: 95.7%; Training loss: 1.5991; Validation loss: 10.9890
Iteration: 9574; Percent complete: 95.7%; Training loss: 1.4150; Validation loss: 10.5529
Iteration: 9575; Percent complete: 95.8%; Training loss: 1.3806; Validation loss: 10.7321
Iteration: 9576; Percent complete: 95.8%; Training loss: 1.3183; Validation loss: 10.8363
Iteration:

Iteration: 9658; Percent complete: 96.6%; Training loss: 1.4204; Validation loss: 10.1890
Iteration: 9659; Percent complete: 96.6%; Training loss: 1.4376; Validation loss: 10.7615
Iteration: 9660; Percent complete: 96.6%; Training loss: 1.4445; Validation loss: 10.5803
Iteration: 9661; Percent complete: 96.6%; Training loss: 1.4737; Validation loss: 10.1155
Iteration: 9662; Percent complete: 96.6%; Training loss: 1.4449; Validation loss: 10.6862
Iteration: 9663; Percent complete: 96.6%; Training loss: 1.4405; Validation loss: 10.6637
Iteration: 9664; Percent complete: 96.6%; Training loss: 1.4095; Validation loss: 10.7922
Iteration: 9665; Percent complete: 96.7%; Training loss: 1.3792; Validation loss: 11.1299
Iteration: 9666; Percent complete: 96.7%; Training loss: 1.5288; Validation loss: 10.8586
Iteration: 9667; Percent complete: 96.7%; Training loss: 1.4726; Validation loss: 10.1474
Iteration: 9668; Percent complete: 96.7%; Training loss: 1.3714; Validation loss: 11.2456
Iteration:

Iteration: 9750; Percent complete: 97.5%; Training loss: 1.3365; Validation loss: 10.6872
Iteration: 9751; Percent complete: 97.5%; Training loss: 1.2865; Validation loss: 10.7766
Iteration: 9752; Percent complete: 97.5%; Training loss: 1.5645; Validation loss: 10.6771
Iteration: 9753; Percent complete: 97.5%; Training loss: 1.5235; Validation loss: 10.7273
Iteration: 9754; Percent complete: 97.5%; Training loss: 1.4774; Validation loss: 10.9153
Iteration: 9755; Percent complete: 97.5%; Training loss: 1.4270; Validation loss: 10.7513
Iteration: 9756; Percent complete: 97.6%; Training loss: 1.4223; Validation loss: 10.7099
Iteration: 9757; Percent complete: 97.6%; Training loss: 1.2816; Validation loss: 10.8797
Iteration: 9758; Percent complete: 97.6%; Training loss: 1.3276; Validation loss: 10.7552
Iteration: 9759; Percent complete: 97.6%; Training loss: 1.5066; Validation loss: 10.4562
Iteration: 9760; Percent complete: 97.6%; Training loss: 1.3961; Validation loss: 10.6535
Iteration:

Iteration: 9842; Percent complete: 98.4%; Training loss: 1.3230; Validation loss: 11.2516
Iteration: 9843; Percent complete: 98.4%; Training loss: 1.3976; Validation loss: 11.0273
Iteration: 9844; Percent complete: 98.4%; Training loss: 1.4467; Validation loss: 11.3869
Iteration: 9845; Percent complete: 98.5%; Training loss: 1.3118; Validation loss: 10.9242
Iteration: 9846; Percent complete: 98.5%; Training loss: 1.4637; Validation loss: 11.1982
Iteration: 9847; Percent complete: 98.5%; Training loss: 1.4756; Validation loss: 10.7964
Iteration: 9848; Percent complete: 98.5%; Training loss: 1.3013; Validation loss: 10.9037
Iteration: 9849; Percent complete: 98.5%; Training loss: 1.4707; Validation loss: 10.7371
Iteration: 9850; Percent complete: 98.5%; Training loss: 1.3247; Validation loss: 10.6022
Iteration: 9851; Percent complete: 98.5%; Training loss: 1.1031; Validation loss: 10.8219
Iteration: 9852; Percent complete: 98.5%; Training loss: 1.3557; Validation loss: 10.6096
Iteration:

Iteration: 9934; Percent complete: 99.3%; Training loss: 1.3942; Validation loss: 10.9532
Iteration: 9935; Percent complete: 99.4%; Training loss: 1.4450; Validation loss: 10.4461
Iteration: 9936; Percent complete: 99.4%; Training loss: 1.4961; Validation loss: 11.1608
Iteration: 9937; Percent complete: 99.4%; Training loss: 1.4471; Validation loss: 10.5607
Iteration: 9938; Percent complete: 99.4%; Training loss: 1.4789; Validation loss: 11.1067
Iteration: 9939; Percent complete: 99.4%; Training loss: 1.4875; Validation loss: 10.3468
Iteration: 9940; Percent complete: 99.4%; Training loss: 1.2393; Validation loss: 10.7868
Iteration: 9941; Percent complete: 99.4%; Training loss: 1.3513; Validation loss: 10.4879
Iteration: 9942; Percent complete: 99.4%; Training loss: 1.3046; Validation loss: 10.5080
Iteration: 9943; Percent complete: 99.4%; Training loss: 1.3658; Validation loss: 10.7546
Iteration: 9944; Percent complete: 99.4%; Training loss: 1.4022; Validation loss: 10.6381
Iteration:

In [None]:
encoder.eval()
decoder.eval()
searcher = SearchDecoder(encoder, decoder)
evaluateInput(encoder, decoder, searcher, voc)