In [1]:
import os
import random
from io import open
import unicodedata
import string
import re

import torch
import torchaudio
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from pathlib import Path
import kaldi_io
import sys
import gc
import json
import time
from data_4 import AudioDataLoader, AudioDataset, pad_list

%matplotlib inline

print_use = False

################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [2]:
# train_json = "data.json"
# test_json = "data_test.json"
train_json="/home1/meichaoyang/workspace/git/Listen-Attend-Spell/egs/aishell2/dump/train/deltatrue/data.json"
test_json="/home1/meichaoyang/workspace/git/Listen-Attend-Spell/egs/aishell2/dump/test/deltatrue/data.json"
batch_size = 32
maxlen_in = 100000
maxlen_out = 30
num_workers = 4

## 加载数据

In [3]:
# with open(train_json, 'rb') as f:
#             data = json.load(f)['utts']

In [4]:
# sorted_data = sorted(data.items(), key=lambda data: int(
#             data[1]['input']['shape'][0]), reverse=True)

In [3]:
tr_dataset = AudioDataset(train_json, batch_size,
                              maxlen_in, maxlen_out)

tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=num_workers)


In [4]:
te_dataset = AudioDataset(test_json, batch_size,
                              maxlen_in, maxlen_out)
te_loader = AudioDataLoader(te_dataset, batch_size=1, num_workers=num_workers)

In [5]:
char_list = []
char_list_path = "/home1/meichaoyang/workspace/git/Listen-Attend-Spell/egs/aishell2/data/lang_1char/train_chars.txt"
with open(char_list_path, "r") as f:
    for line in f:
        data = line.split()
        char_list.append(data[0])

# 模型搭建

In [6]:
MAX_LENGTH= 200
SOS_token = 0
EOS_token = 1
os.environ["CUDA_VISIBLE_DEVICES"]="3"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Attention

In [7]:
class DotProductAttention(nn.Module):
    r"""Dot product attention.
    Given a set of vector values, and a vector query, attention is a technique
    to compute a weighted sum of the values, dependent on the query.

    NOTE: Here we use the terminology in Stanford cs224n-2018-lecture11.
    """

    def __init__(self):
        super(DotProductAttention, self).__init__()
        # TODO: move this out of this class?
        # self.linear_out = nn.Linear(dim*2, dim)

    def forward(self, queries, values):
        """
        Args:
            queries: N x To x H
            values : N x Ti x H

        Returns:
            output: N x To x H
            attention_distribution: N x To x Ti
        """
        batch_size = queries.size(0)
        hidden_size = queries.size(2)
        input_lengths = values.size(1)
        # (N, To, H) * (N, H, Ti) -> (N, To, Ti)
        attention_scores = torch.bmm(queries, values.transpose(1, 2))
        attention_distribution = F.softmax(
            attention_scores.view(-1, input_lengths), dim=1).view(batch_size, -1, input_lengths)
        # (N, To, Ti) * (N, Ti, H) -> (N, To, H)
        attention_output = torch.bmm(attention_distribution, values)
        # # concat -> (N, To, 2*H)
        # concated = torch.cat((attention_output, queries), dim=2)
        # # TODO: Move this out of this class?
        # # output -> (N, To, H)
        # output = torch.tanh(self.linear_out(
        #     concated.view(-1, 2*hidden_size))).view(batch_size, -1, hidden_size)

        return attention_output, attention_distribution

### 金字塔BLSTM

In [8]:
class pyramidalBLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.0, bidirectional=True):
        super(pyramidalBLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers,
                           batch_first=True,
                           dropout=dropout,
                           bidirectional=bidirectional)
        
    def forward(self, padded_input, input_lengths):
        
        total_length = padded_input.size(1)  # get the max sequence length
        packed_input = pack_padded_sequence(padded_input, input_lengths,
                                            batch_first=True)
        packed_output, hidden = self.rnn(packed_input)
        output, _ = pad_packed_sequence(packed_output,
                                        batch_first=True,
                                        total_length=total_length)
        return output, hidden
        

In [9]:
a = [[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]]
a = torch.tensor(a)

In [10]:
a = torch.rand([32, 2236, 512])
a.shape

torch.Size([32, 2236, 512])

In [11]:
# a.reshape(a.shape[0],a.shape[1]//2,a.shape[2]*2)

## 模型构建

In [12]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout=0.0):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.first = True

        self.pyramidalBLSTM = pyramidalBLSTM(input_size, hidden_size, 1, dropout=dropout)

    def forward(self, input, input_lengths):

        output, hidden = self.pyramidalBLSTM(input, input_lengths)

        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [13]:
class Decoder(nn.Module):
    """
    """

    def __init__(self, vocab_size, embedding_dim, sos_id, eos_id, hidden_size,
                 num_layers, bidirectional_encoder=True):
        super(Decoder, self).__init__()
        # Hyper parameters
        # embedding + output
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.sos_id = sos_id  # Start of Sentence
        self.eos_id = eos_id  # End of Sentence
        # rnn
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional_encoder = bidirectional_encoder  # useless now
        self.encoder_hidden_size = hidden_size  # must be equal now
        # Components
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.rnn = nn.ModuleList()
        self.rnn += [nn.LSTMCell(self.embedding_dim +
                                 self.encoder_hidden_size, self.hidden_size)]
        for l in range(1, self.num_layers):
            self.rnn += [nn.LSTMCell(self.hidden_size, self.hidden_size)]
        self.attention = DotProductAttention()
        self.mlp = nn.Sequential(
            nn.Linear(self.encoder_hidden_size + self.hidden_size,
                      self.hidden_size),
            nn.Tanh(),
            nn.Linear(self.hidden_size, self.vocab_size))

    def zero_state(self, encoder_padded_outputs, H=None):
        N = encoder_padded_outputs.size(0)
        H = self.hidden_size if H == None else H
        return encoder_padded_outputs.new_zeros(N, H)

    def forward(self, padded_input, encoder_padded_outputs):
        """
        Args:
            padded_input: N x To
            # encoder_hidden: (num_layers * num_directions) x N x H
            encoder_padded_outputs: N x Ti x H

        Returns:
        """
        # *********Get Input and Output
        # from espnet/Decoder.forward()
        # TODO: need to make more smart way
        ys = [y[y != IGNORE_ID] for y in padded_input]  # parse padded ys
        # prepare input and output word sequences with sos/eos IDs
        eos = ys[0].new([self.eos_id])
        sos = ys[0].new([self.sos_id])
        ys_in = [torch.cat([sos, y], dim=0) for y in ys]
        ys_out = [torch.cat([y, eos], dim=0) for y in ys]
        # padding for ys with -1
        # pys: utt x olen
        ys_in_pad = pad_list(ys_in, self.eos_id)
        ys_out_pad = pad_list(ys_out, IGNORE_ID)
        # print("ys_in_pad", ys_in_pad.size())
        assert ys_in_pad.size() == ys_out_pad.size()
        batch_size = ys_in_pad.size(0)
        output_length = ys_in_pad.size(1)
        # max_length = ys_in_pad.size(1) - 1  # TODO: should minus 1(sos)?

        # *********Init decoder rnn
        h_list = [self.zero_state(encoder_padded_outputs)]
        c_list = [self.zero_state(encoder_padded_outputs)]
        for l in range(1, self.num_layers):
            h_list.append(self.zero_state(encoder_padded_outputs))
            c_list.append(self.zero_state(encoder_padded_outputs))
        att_c = self.zero_state(encoder_padded_outputs,
                                H=encoder_padded_outputs.size(2))
        y_all = []

        # **********LAS: 1. decoder rnn 2. attention 3. concate and MLP
        embedded = self.embedding(ys_in_pad)
        for t in range(output_length):
            # step 1. decoder RNN: s_i = RNN(s_i−1,y_i−1,c_i−1)
            rnn_input = torch.cat((embedded[:, t, :], att_c), dim=1)
            h_list[0], c_list[0] = self.rnn[0](
                rnn_input, (h_list[0], c_list[0]))
            for l in range(1, self.num_layers):
                h_list[l], c_list[l] = self.rnn[l](
                    h_list[l-1], (h_list[l], c_list[l]))
            rnn_output = h_list[-1]  # below unsqueeze: (N x H) -> (N x 1 x H)
            # step 2. attention: c_i = AttentionContext(s_i,h)
            att_c, att_w = self.attention(rnn_output.unsqueeze(dim=1),
                                          encoder_padded_outputs)
            att_c = att_c.squeeze(dim=1)
            # step 3. concate s_i and c_i, and input to MLP
            mlp_input = torch.cat((rnn_output, att_c), dim=1)
            predicted_y_t = self.mlp(mlp_input)
            y_all.append(predicted_y_t)

        y_all = torch.stack(y_all, dim=1)  # N x To x C
        # **********Cross Entropy Loss
        # F.cross_entropy = NLL(log_softmax(input), target))
        y_all = y_all.view(batch_size * output_length, self.vocab_size)
        ce_loss = F.cross_entropy(y_all, ys_out_pad.view(-1),
                                  ignore_index=IGNORE_ID,
                                  reduction='mean')

        return ce_loss

       

    def recognize_beam(self, encoder_outputs, char_list, args):
        """Beam search, decode one utterence now.
        Args:
            encoder_outputs: T x H
            char_list: list of character
            args: args.beam

        Returns:
            nbest_hyps:
        """
        # search params
        beam = args.beam_size
        nbest = args.nbest
        if args.decode_max_len == 0:
            maxlen = encoder_outputs.size(0)
        else:
            maxlen = args.decode_max_len

        # *********Init decoder rnn
        h_list = [self.zero_state(encoder_outputs.unsqueeze(0))]
        c_list = [self.zero_state(encoder_outputs.unsqueeze(0))]
        for l in range(1, self.num_layers):
            h_list.append(self.zero_state(encoder_outputs.unsqueeze(0)))
            c_list.append(self.zero_state(encoder_outputs.unsqueeze(0)))
        att_c = self.zero_state(encoder_outputs.unsqueeze(0),
                                H=encoder_outputs.unsqueeze(0).size(2))
        # prepare sos
        y = self.sos_id
        vy = encoder_outputs.new_zeros(1).long()

        hyp = {'score': 0.0, 'yseq': [y], 'c_prev': c_list, 'h_prev': h_list,
               'a_prev': att_c}
        hyps = [hyp]
        ended_hyps = []

        for i in range(maxlen):
            hyps_best_kept = []
            for hyp in hyps:
                # vy.unsqueeze(1)
                vy[0] = hyp['yseq'][i]
                embedded = self.embedding(vy)
                # embedded.unsqueeze(0)
                # step 1. decoder RNN: s_i = RNN(s_i−1,y_i−1,c_i−1)
                rnn_input = torch.cat((embedded, hyp['a_prev']), dim=1)
                h_list[0], c_list[0] = self.rnn[0](
                    rnn_input, (hyp['h_prev'][0], hyp['c_prev'][0]))
                for l in range(1, self.num_layers):
                    h_list[l], c_list[l] = self.rnn[l](
                        h_list[l-1], (hyp['h_prev'][l], hyp['c_prev'][l]))
                rnn_output = h_list[-1]
                # step 2. attention: c_i = AttentionContext(s_i,h)
                # below unsqueeze: (N x H) -> (N x 1 x H)
                att_c, att_w = self.attention(rnn_output.unsqueeze(dim=1),
                                              encoder_outputs.unsqueeze(0))
                att_c = att_c.squeeze(dim=1)
                # step 3. concate s_i and c_i, and input to MLP
                mlp_input = torch.cat((rnn_output, att_c), dim=1)
                predicted_y_t = self.mlp(mlp_input)
                local_scores = F.log_softmax(predicted_y_t, dim=1)
                # topk scores
                local_best_scores, local_best_ids = torch.topk(
                    local_scores, beam, dim=1)

                for j in range(beam):
                    new_hyp = {}
                    new_hyp['h_prev'] = h_list[:]
                    new_hyp['c_prev'] = c_list[:]
                    new_hyp['a_prev'] = att_c[:]
                    new_hyp['score'] = hyp['score'] + local_best_scores[0, j]
                    new_hyp['yseq'] = [0] * (1 + len(hyp['yseq']))
                    new_hyp['yseq'][:len(hyp['yseq'])] = hyp['yseq']
                    new_hyp['yseq'][len(hyp['yseq'])] = int(
                        local_best_ids[0, j])
                    # will be (2 x beam) hyps at most
                    hyps_best_kept.append(new_hyp)

                hyps_best_kept = sorted(hyps_best_kept,
                                        key=lambda x: x['score'],
                                        reverse=True)[:beam]
            # end for hyp in hyps
            hyps = hyps_best_kept

            # add eos in the final loop to avoid that there are no ended hyps
            if i == maxlen - 1:
                for hyp in hyps:
                    hyp['yseq'].append(self.eos_id)

            # add ended hypothes to a final list, and removed them from current hypothes
            # (this will be a probmlem, number of hyps < beam)
            remained_hyps = []
            for hyp in hyps:
                if hyp['yseq'][-1] == self.eos_id:
                    # hyp['score'] += (i + 1) * penalty
                    ended_hyps.append(hyp)
                else:
                    remained_hyps.append(hyp)

            hyps = remained_hyps
            if len(hyps) > 0:
                print('remeined hypothes: ' + str(len(hyps)))
            else:
                print('no hypothesis. Finish decoding.')
                break

            for hyp in hyps:
                print('hypo: ' + ''.join([char_list[int(x)]
                                          for x in hyp['yseq'][1:]]))
        # end for i in range(maxlen)
        nbest_hyps = sorted(ended_hyps, key=lambda x: x['score'], reverse=True)[
            :min(len(ended_hyps), nbest)]
        return nbest_hyps

In [14]:
class Seq2Seq(nn.Module):
    """Sequence-to-Sequence architecture with configurable encoder and decoder.
    """

    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, padded_input, input_lengths, padded_target):
        """
        Args:
            padded_input: N x Ti x D
            padded_targets: N x To
        """
        encoder_padded_outputs, _ = self.encoder(padded_input , input_lengths)
        loss = self.decoder(padded_target, encoder_padded_outputs)
        return loss
    
    def recognize(self, input, input_lengths, char_list, args):
        """Sequence-to-Sequence beam search, decode one utterence now.
        Args:
            input: T x D
            char_list: list of characters
            args: args.beam

        Returns:
            nbest_hyps:
        """
        encoder_outputs, _ = self.encoder(input, input_lengths)
#         print("encoder_outputs", encoder_outputs.squeeze(1).shape)
        
        nbest_hyps = self.decoder.recognize_beam(encoder_outputs.squeeze(0), char_list, args)
        return nbest_hyps


In [43]:
def parse_hypothesis(hyp, char_list):
    """Function to parse hypothesis

    :param list hyp: recognition hypothesis
    :param list char_list: list of characters
    :return: recognition text strinig
    :return: recognition token strinig
    :return: recognition tokenid string
    """
    # remove sos and get results
    tokenid_as_list = list(map(int, hyp['yseq'][1:]))
    token_as_list = [char_list[idx] for idx in tokenid_as_list]
    score = float(hyp['score'])

    # convert to string
    tokenid = " ".join([str(idx) for idx in tokenid_as_list])
    token = " ".join(token_as_list)
    text = "".join(token_as_list).replace('<space>', ' ')
    return text, token, tokenid, score

def add_results_to_json(js, nbest_hyps, char_list):
    """Function to add N-best results to json

    :param dict js: groundtruth utterance dict
    :param list nbest_hyps: list of hypothesis
    :param list char_list: list of characters
    :return: N-best results added utterance dict
    """
    # copy old json info
    new_js = dict()
    new_js['utt2spk'] = js['utt2spk']
    new_js['output'] = []

    for n, hyp in enumerate(nbest_hyps, 1):
        # parse hypothesis
        rec_text, rec_token, rec_tokenid, score = parse_hypothesis(
            hyp, char_list)

        # copy ground-truth
        out_dic = dict(js['output'][0].items())


        # add recognition results
        out_dic['rec_text'] = rec_text
        out_dic['score'] = score

        # add to list of N-best result dicts
        new_js['output'].append(out_dic)

        # show 1-best result
        if n == 1:
            print('groundtruth: %s' % out_dic['text'])
            print('prediction : %s' % out_dic['rec_text'])

    return new_js

## 单步训练

In [15]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

## 训练迭代

In [16]:
def trainIters(model, epoch, optimizier, print_every=10, plot_every=10, learning_rate=0.01):
    log = open('train.log', 'w')
    start = time.time()
    n_iters = len(tr_dataset)
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
#     training_pairs = random.choices(a, k=n_iters)
    
    criterion = nn.NLLLoss()

    for e in range(epoch):
        for i, (data) in enumerate(tr_loader):
            padded_input, input_lengths, padded_target = data
            padded_input, input_lengths, padded_target = data
            padded_input = padded_input.cuda()
            input_lengths = input_lengths.cuda()
            padded_target = padded_target.cuda()
#             print("padded_input:",padded_input,"\npadded_target",padded_target)
            loss = model(padded_input, input_lengths, padded_target)
    #         print(loss) #.requires_grad
            print_loss_total += float(loss)
            plot_loss_total += float(loss)

            optimizier.zero_grad()
            loss.backward()

            optimizier.step()

            if (i+1) % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                txt = 'Epoch %d | Iter %d | %s (%d %d%%) %.4f' % (e+1, i+1, timeSince(start, (e *n_iters +i+1) / (n_iters*epoch)),
                                             (i+1), (e *n_iters +i+1) / (n_iters*epoch) * 100, print_loss_avg)
                print(txt)
                log.write(txt + "\n")
                log.flush()
            if i+1 % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

    log.close()

In [17]:
ys=[i.split() for i in ['873 93 4882 995 1485 1748 3315 1685 374 3071 66 1976 3071 2081 153 894 1940 3124 4958 106 3103 3746 406 2044 2090 4815 4514 1940 123 1556 1536 2807 87 5098 602 3071 1002 4958', '48 38 47 1500 4463 581 1847 421 58 1714 3205 184 1500 4463 1017 2879 4381 2120 4381 3205 1880 1880 822 4381 2120 4381 3205 1880 2841 3279 1718', '161 1398 87 1764 428 1956 3479 2045 2547 4620 428 1174 2187 3506 1764 3479 5093 128 577 1398 3069 228 428', '4336 1349 4626 1366 1735 525 2080 127 525 113 1990 1735 4571 1826 607 2612 1133 224 1735 4571 3780 153 4858 1062 1371 4189 2800 1251 871 1293 3071 1017 4856 1251 2970 4858 2034', '60 613 1960 1672 163 4891 4536 5093 5093 96 4378 4471 4471 1940 65 3071 3331 3749', '3611 347 1691 2888 1027 1017 1206 203 1248 4359 127 3069 871 525 61 3576 341 1880 2841 3279 1718', '1243 3747 2095 1413 2028 3237 93 3456 2701 3071 521 4815 279 613 2101 4764 4246 527 3460 2028 3237 93 3526 2701', '267 3564 1977 3339 4373 4840 601 2500 421 135 596 4672 580 479 2982 1159 132 525 358 4668 3339 4373 4174 4544', '1399 71 1248 366 87 58 627 421 4179 4500 5059 3071 436 1084 3561 584 254 358 2766 262 4500 3569 4378 4218', '63 3069 113 525 4908 63 526 871 3069 359 525 113 2728 359 58', '4176 1946 4126 1755 537 4381 538 3412 1059 1049 58 127 627', '3686 1016 2638 4385 953 4786 1686 2081 1274 2374 3071 1216 1424 2972 2981 687 358 362 4209 1980 2972 2981', '4354 2879 225 225 5156 2205 871 1685 338 1159 225 268 637 894 85 153 3561 3338 64 358 1438 1717 4189 123 603 1064', '2296 913 511 1691 2155 36 47 38 35 49 30 3486 3496 94 2101 1691 2155 101 125 1880 2841 12 23', '279 1568 2000 1735 4198 3607 2353 135 4894 622 1227 5186 4480 2768 78 3884 1366 901 3182 3312 4620 1956 1826 2028 3237', '1721 3995 3898 2190 716 1380 447 522 4867 4496 445 87 2362 3414 935 4512 2120 2476 1349 4904 2390 4475 1014']]

In [18]:
nonzero_sorted_idx=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

In [19]:
k=[ys[i] for i in nonzero_sorted_idx]

In [28]:
char_list

['<unk>',
 '<sos>',
 '<eos>',
 "'",
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '㶧',
 '䶮',
 '一',
 '丁',
 '七',
 '万',
 '丈',
 '三',
 '上',
 '下',
 '不',
 '与',
 '丐',
 '丑',
 '专',
 '且',
 '丕',
 '世',
 '丘',
 '丙',
 '业',
 '丛',
 '东',
 '丝',
 '丞',
 '丢',
 '两',
 '严',
 '丧',
 '个',
 '丫',
 '中',
 '丰',
 '串',
 '临',
 '丸',
 '丹',
 '为',
 '主',
 '丽',
 '举',
 '乃',
 '久',
 '么',
 '义',
 '之',
 '乌',
 '乍',
 '乎',
 '乏',
 '乐',
 '乒',
 '乓',
 '乔',
 '乖',
 '乘',
 '乙',
 '九',
 '乞',
 '也',
 '习',
 '乡',
 '书',
 '买',
 '乱',
 '乳',
 '乾',
 '了',
 '予',
 '争',
 '事',
 '二',
 '于',
 '亏',
 '云',
 '互',
 '五',
 '井',
 '亘',
 '亚',
 '些',
 '亟',
 '亡',
 '亢',
 '交',
 '亥',
 '亦',
 '产',
 '亨',
 '亩',
 '享',
 '京',
 '亭',
 '亮',
 '亲',
 '亳',
 '亵',
 '人',
 '亿',
 '什',
 '仁',
 '仄',
 '仅',
 '仆',
 '仇',
 '今',
 '介',
 '仍',
 '从'

In [29]:
input_size = 240

hidden_size = 256
vocab_size = len(char_list)
embedding_dim = 512
sos_id = 1
eos_id = 2
learning_rate = 1e-3
momentum = 0
l2 = 1e-5

IGNORE_ID=-1

encoder = Encoder(input_size, hidden_size, dropout=0.0)
decoder = Decoder(vocab_size, embedding_dim, sos_id, eos_id, hidden_size*2,
                 num_layers=1, bidirectional_encoder=True)

model = Seq2Seq(encoder, decoder)
print(model)
model.cuda()

optimizier = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
#                                      momentum=momentum,
                                     weight_decay=l2)
trainIters(model, 20,optimizier, print_every=100)

Seq2Seq(
  (encoder): Encoder(
    (pyramidalBLSTM): pyramidalBLSTM(
      (rnn): LSTM(240, 256, batch_first=True, bidirectional=True)
    )
  )
  (decoder): Decoder(
    (embedding): Embedding(5221, 512)
    (rnn): ModuleList(
      (0): LSTMCell(1024, 512)
    )
    (attention): DotProductAttention()
    (mlp): Sequential(
      (0): Linear(in_features=1024, out_features=512, bias=True)
      (1): Tanh()
      (2): Linear(in_features=512, out_features=5221, bias=True)
    )
  )
)
Epoch 1 | Iter 100 | 0m 19s (- 1921m 14s) (100 0%) 6.9498
Epoch 1 | Iter 200 | 0m 35s (- 1735m 55s) (200 0%) 6.4405
Epoch 1 | Iter 300 | 0m 50s (- 1649m 7s) (300 0%) 5.9445
Epoch 1 | Iter 400 | 1m 5s (- 1596m 54s) (400 0%) 5.5277
Epoch 1 | Iter 500 | 1m 19s (- 1543m 55s) (500 0%) 5.2196
Epoch 1 | Iter 600 | 1m 32s (- 1496m 18s) (600 0%) 5.0425
Epoch 1 | Iter 700 | 1m 45s (- 1460m 23s) (700 0%) 4.8887
Epoch 1 | Iter 800 | 1m 57s (- 1429m 26s) (800 0%) 4.7706
Epoch 1 | Iter 900 | 2m 10s (- 1404m 31s) (900 0%) 

Epoch 1 | Iter 13900 | 22m 55s (- 939m 50s) (13900 2%) 0.6887
Epoch 1 | Iter 14000 | 23m 3s (- 938m 28s) (14000 2%) 0.6457
Epoch 1 | Iter 14100 | 23m 11s (- 936m 46s) (14100 2%) 0.8462
Epoch 1 | Iter 14200 | 23m 18s (- 935m 2s) (14200 2%) 0.6419
Epoch 1 | Iter 14300 | 23m 26s (- 933m 10s) (14300 2%) 0.6565
Epoch 1 | Iter 14400 | 23m 33s (- 931m 42s) (14400 2%) 0.7115
Epoch 1 | Iter 14500 | 23m 41s (- 930m 6s) (14500 2%) 0.6494
Epoch 1 | Iter 14600 | 23m 49s (- 928m 37s) (14600 2%) 0.6845
Epoch 1 | Iter 14700 | 23m 57s (- 927m 7s) (14700 2%) 0.7099
Epoch 1 | Iter 14800 | 24m 4s (- 925m 25s) (14800 2%) 0.6685
Epoch 1 | Iter 14900 | 24m 11s (- 923m 49s) (14900 2%) 0.7111
Epoch 1 | Iter 15000 | 24m 19s (- 922m 31s) (15000 2%) 0.6619
Epoch 1 | Iter 15100 | 24m 27s (- 921m 21s) (15100 2%) 0.7663
Epoch 1 | Iter 15200 | 24m 35s (- 920m 4s) (15200 2%) 0.7495
Epoch 1 | Iter 15300 | 24m 43s (- 918m 48s) (15300 2%) 0.6648
Epoch 1 | Iter 15400 | 24m 51s (- 917m 30s) (15400 2%) 0.6735
Epoch 1 | Iter

Epoch 1 | Iter 27200 | 37m 33s (- 768m 19s) (27200 4%) 0.4921
Epoch 1 | Iter 27300 | 37m 38s (- 767m 8s) (27300 4%) 0.9454
Epoch 1 | Iter 27400 | 37m 43s (- 765m 49s) (27400 4%) 0.5622
Epoch 1 | Iter 27500 | 37m 47s (- 764m 29s) (27500 4%) 0.5359
Epoch 1 | Iter 27600 | 37m 52s (- 763m 5s) (27600 4%) 0.4515
Epoch 1 | Iter 27700 | 37m 56s (- 761m 42s) (27700 4%) 0.5733
Epoch 1 | Iter 27800 | 38m 1s (- 760m 18s) (27800 4%) 0.5011
Epoch 1 | Iter 27900 | 38m 5s (- 758m 57s) (27900 4%) 0.5427
Epoch 1 | Iter 28000 | 38m 10s (- 757m 35s) (28000 4%) 0.5663
Epoch 1 | Iter 28100 | 38m 15s (- 756m 25s) (28100 4%) 0.9051
Epoch 1 | Iter 28200 | 38m 19s (- 755m 3s) (28200 4%) 0.5676
Epoch 1 | Iter 28300 | 38m 24s (- 753m 44s) (28300 4%) 0.4689
Epoch 1 | Iter 28400 | 38m 28s (- 752m 19s) (28400 4%) 0.5350
Epoch 1 | Iter 28500 | 38m 32s (- 750m 53s) (28500 4%) 0.5361
Epoch 1 | Iter 28600 | 38m 36s (- 749m 34s) (28600 4%) 0.7963
Epoch 1 | Iter 28700 | 38m 41s (- 748m 11s) (28700 4%) 0.6221
Epoch 1 | Ite

Epoch 2 | Iter 11800 | 57m 41s (- 763m 51s) (11800 7%) 0.5295
Epoch 2 | Iter 11900 | 57m 49s (- 763m 38s) (11900 7%) 0.4993
Epoch 2 | Iter 12000 | 57m 57s (- 763m 24s) (12000 7%) 0.5319
Epoch 2 | Iter 12100 | 58m 4s (- 763m 4s) (12100 7%) 0.5091
Epoch 2 | Iter 12200 | 58m 12s (- 762m 49s) (12200 7%) 0.6231
Epoch 2 | Iter 12300 | 58m 19s (- 762m 25s) (12300 7%) 0.4889
Epoch 2 | Iter 12400 | 58m 26s (- 761m 52s) (12400 7%) 0.5257
Epoch 2 | Iter 12500 | 58m 32s (- 761m 18s) (12500 7%) 0.5116
Epoch 2 | Iter 12600 | 58m 39s (- 760m 44s) (12600 7%) 0.5779
Epoch 2 | Iter 12700 | 58m 45s (- 760m 10s) (12700 7%) 0.5385
Epoch 2 | Iter 12800 | 58m 52s (- 759m 35s) (12800 7%) 0.5443
Epoch 2 | Iter 12900 | 58m 58s (- 759m 1s) (12900 7%) 0.5284
Epoch 2 | Iter 13000 | 59m 6s (- 758m 43s) (13000 7%) 0.5239
Epoch 2 | Iter 13100 | 59m 14s (- 758m 32s) (13100 7%) 0.6309
Epoch 2 | Iter 13200 | 59m 22s (- 758m 17s) (13200 7%) 0.5070
Epoch 2 | Iter 13300 | 59m 30s (- 758m 2s) (13300 7%) 0.5375
Epoch 2 | Ite

Epoch 2 | Iter 25100 | 71m 9s (- 693m 59s) (25100 9%) 0.7124
Epoch 2 | Iter 25200 | 71m 14s (- 693m 24s) (25200 9%) 0.4719
Epoch 2 | Iter 25300 | 71m 19s (- 692m 48s) (25300 9%) 0.5121
Epoch 2 | Iter 25400 | 71m 24s (- 692m 12s) (25400 9%) 0.3880
Epoch 2 | Iter 25500 | 71m 29s (- 691m 35s) (25500 9%) 0.4050
Epoch 2 | Iter 25600 | 71m 34s (- 690m 59s) (25600 9%) 0.4559
Epoch 2 | Iter 25700 | 71m 39s (- 690m 22s) (25700 9%) 0.3942
Epoch 2 | Iter 25800 | 71m 44s (- 689m 48s) (25800 9%) 0.5108
Epoch 2 | Iter 25900 | 71m 49s (- 689m 13s) (25900 9%) 0.4109
Epoch 2 | Iter 26000 | 71m 54s (- 688m 37s) (26000 9%) 0.4233
Epoch 2 | Iter 26100 | 71m 59s (- 688m 4s) (26100 9%) 0.4802
Epoch 2 | Iter 26200 | 72m 4s (- 687m 31s) (26200 9%) 0.5446
Epoch 2 | Iter 26300 | 72m 9s (- 687m 0s) (26300 9%) 0.7411
Epoch 2 | Iter 26400 | 72m 14s (- 686m 24s) (26400 9%) 0.4533
Epoch 2 | Iter 26500 | 72m 19s (- 685m 47s) (26500 9%) 0.3523
Epoch 2 | Iter 26600 | 72m 24s (- 685m 11s) (26600 9%) 0.5067
Epoch 2 | Ite

Epoch 3 | Iter 9500 | 91m 28s (- 695m 15s) (9500 11%) 0.4534
Epoch 3 | Iter 9600 | 91m 37s (- 695m 11s) (9600 11%) 0.5187
Epoch 3 | Iter 9700 | 91m 46s (- 695m 8s) (9700 11%) 0.4926
Epoch 3 | Iter 9800 | 91m 54s (- 695m 5s) (9800 11%) 0.4695
Epoch 3 | Iter 9900 | 92m 3s (- 695m 1s) (9900 11%) 0.4842
Epoch 3 | Iter 10000 | 92m 12s (- 694m 57s) (10000 11%) 0.4579
Epoch 3 | Iter 10100 | 92m 20s (- 694m 53s) (10100 11%) 0.4902
Epoch 3 | Iter 10200 | 92m 29s (- 694m 48s) (10200 11%) 0.4713
Epoch 3 | Iter 10300 | 92m 37s (- 694m 43s) (10300 11%) 0.4977
Epoch 3 | Iter 10400 | 92m 46s (- 694m 39s) (10400 11%) 0.5093
Epoch 3 | Iter 10500 | 92m 54s (- 694m 33s) (10500 11%) 0.4973
Epoch 3 | Iter 10600 | 93m 3s (- 694m 31s) (10600 11%) 0.4684
Epoch 3 | Iter 10700 | 93m 12s (- 694m 32s) (10700 11%) 0.4739
Epoch 3 | Iter 10800 | 93m 22s (- 694m 32s) (10800 11%) 0.4470
Epoch 3 | Iter 10900 | 93m 30s (- 694m 28s) (10900 11%) 0.4579
Epoch 3 | Iter 11000 | 93m 39s (- 694m 22s) (11000 11%) 0.4495
Epoch 3

Epoch 3 | Iter 22500 | 107m 11s (- 666m 27s) (22500 13%) 0.4799
Epoch 3 | Iter 22600 | 107m 16s (- 666m 5s) (22600 13%) 0.4530
Epoch 3 | Iter 22700 | 107m 22s (- 665m 45s) (22700 13%) 0.4974
Epoch 3 | Iter 22800 | 107m 28s (- 665m 25s) (22800 13%) 0.6582
Epoch 3 | Iter 22900 | 107m 34s (- 665m 3s) (22900 13%) 0.4159
Epoch 3 | Iter 23000 | 107m 40s (- 664m 41s) (23000 13%) 0.3719
Epoch 3 | Iter 23100 | 107m 45s (- 664m 18s) (23100 13%) 0.4380
Epoch 3 | Iter 23200 | 107m 51s (- 663m 56s) (23200 13%) 0.4394
Epoch 3 | Iter 23300 | 107m 56s (- 663m 33s) (23300 13%) 0.4357
Epoch 3 | Iter 23400 | 108m 2s (- 663m 10s) (23400 14%) 0.3395
Epoch 3 | Iter 23500 | 108m 7s (- 662m 48s) (23500 14%) 0.4054
Epoch 3 | Iter 23600 | 108m 13s (- 662m 25s) (23600 14%) 0.4427
Epoch 3 | Iter 23700 | 108m 18s (- 662m 2s) (23700 14%) 0.4482
Epoch 3 | Iter 23800 | 108m 24s (- 661m 41s) (23800 14%) 0.4815
Epoch 3 | Iter 23900 | 108m 30s (- 661m 20s) (23900 14%) 0.6981
Epoch 3 | Iter 24000 | 108m 35s (- 660m 58s) 

Epoch 4 | Iter 6600 | 125m 14s (- 651m 9s) (6600 16%) 0.4512
Epoch 4 | Iter 6700 | 125m 23s (- 651m 9s) (6700 16%) 0.4702
Epoch 4 | Iter 6800 | 125m 33s (- 651m 8s) (6800 16%) 0.4485
Epoch 4 | Iter 6900 | 125m 42s (- 651m 7s) (6900 16%) 0.4924
Epoch 4 | Iter 7000 | 125m 51s (- 651m 5s) (7000 16%) 0.4566
Epoch 4 | Iter 7100 | 126m 0s (- 651m 4s) (7100 16%) 0.4497
Epoch 4 | Iter 7200 | 126m 10s (- 651m 2s) (7200 16%) 0.4441
Epoch 4 | Iter 7300 | 126m 19s (- 650m 59s) (7300 16%) 0.4382
Epoch 4 | Iter 7400 | 126m 28s (- 650m 57s) (7400 16%) 0.4768
Epoch 4 | Iter 7500 | 126m 37s (- 650m 55s) (7500 16%) 0.5040
Epoch 4 | Iter 7600 | 126m 46s (- 650m 53s) (7600 16%) 0.4679
Epoch 4 | Iter 7700 | 126m 55s (- 650m 50s) (7700 16%) 0.4536
Epoch 4 | Iter 7800 | 127m 4s (- 650m 47s) (7800 16%) 0.4474
Epoch 4 | Iter 7900 | 127m 13s (- 650m 44s) (7900 16%) 0.4497
Epoch 4 | Iter 8000 | 127m 22s (- 650m 41s) (8000 16%) 0.4533
Epoch 4 | Iter 8100 | 127m 31s (- 650m 39s) (8100 16%) 0.4450
Epoch 4 | Iter 82

Epoch 4 | Iter 19600 | 142m 13s (- 632m 30s) (19600 18%) 0.4089
Epoch 4 | Iter 19700 | 142m 19s (- 632m 15s) (19700 18%) 0.4415
Epoch 4 | Iter 19800 | 142m 25s (- 631m 59s) (19800 18%) 0.3810
Epoch 4 | Iter 19900 | 142m 32s (- 631m 43s) (19900 18%) 0.4384
Epoch 4 | Iter 20000 | 142m 38s (- 631m 28s) (20000 18%) 0.4512
Epoch 4 | Iter 20100 | 142m 44s (- 631m 11s) (20100 18%) 0.3906
Epoch 4 | Iter 20200 | 142m 50s (- 630m 55s) (20200 18%) 0.4360
Epoch 4 | Iter 20300 | 142m 56s (- 630m 39s) (20300 18%) 0.4598
Epoch 4 | Iter 20400 | 143m 2s (- 630m 23s) (20400 18%) 0.3994
Epoch 4 | Iter 20500 | 143m 8s (- 630m 7s) (20500 18%) 0.5651
Epoch 4 | Iter 20600 | 143m 14s (- 629m 51s) (20600 18%) 0.4900
Epoch 4 | Iter 20700 | 143m 21s (- 629m 35s) (20700 18%) 0.4258
Epoch 4 | Iter 20800 | 143m 27s (- 629m 19s) (20800 18%) 0.4282
Epoch 4 | Iter 20900 | 143m 33s (- 629m 2s) (20900 18%) 0.3805
Epoch 4 | Iter 21000 | 143m 39s (- 628m 46s) (21000 18%) 0.4271
Epoch 4 | Iter 21100 | 143m 45s (- 628m 29s)

Epoch 5 | Iter 3600 | 158m 9s (- 608m 58s) (3600 20%) 0.4653
Epoch 5 | Iter 3700 | 158m 19s (- 609m 0s) (3700 20%) 0.4553
Epoch 5 | Iter 3800 | 158m 30s (- 609m 2s) (3800 20%) 0.4654
Epoch 5 | Iter 3900 | 158m 40s (- 609m 4s) (3900 20%) 0.4649
Epoch 5 | Iter 4000 | 158m 51s (- 609m 5s) (4000 20%) 0.4877
Epoch 5 | Iter 4100 | 159m 1s (- 609m 7s) (4100 20%) 0.4451
Epoch 5 | Iter 4200 | 159m 11s (- 609m 8s) (4200 20%) 0.4466
Epoch 5 | Iter 4300 | 159m 21s (- 609m 9s) (4300 20%) 0.4396
Epoch 5 | Iter 4400 | 159m 32s (- 609m 9s) (4400 20%) 0.4827
Epoch 5 | Iter 4500 | 159m 42s (- 609m 10s) (4500 20%) 0.4360
Epoch 5 | Iter 4600 | 159m 52s (- 609m 10s) (4600 20%) 0.4392
Epoch 5 | Iter 4700 | 160m 2s (- 609m 10s) (4700 20%) 0.4508
Epoch 5 | Iter 4800 | 160m 12s (- 609m 10s) (4800 20%) 0.4485
Epoch 5 | Iter 4900 | 160m 22s (- 609m 10s) (4900 20%) 0.4630
Epoch 5 | Iter 5000 | 160m 31s (- 609m 9s) (5000 20%) 0.4431
Epoch 5 | Iter 5100 | 160m 41s (- 609m 9s) (5100 20%) 0.4555
Epoch 5 | Iter 5200 |

Epoch 5 | Iter 16700 | 176m 58s (- 597m 8s) (16700 22%) 0.4457
Epoch 5 | Iter 16800 | 177m 5s (- 596m 57s) (16800 22%) 0.4176
Epoch 5 | Iter 16900 | 177m 12s (- 596m 46s) (16900 22%) 0.4261
Epoch 5 | Iter 17000 | 177m 19s (- 596m 35s) (17000 22%) 0.3764
Epoch 5 | Iter 17100 | 177m 26s (- 596m 24s) (17100 22%) 0.4331
Epoch 5 | Iter 17200 | 177m 33s (- 596m 12s) (17200 22%) 0.4158
Epoch 5 | Iter 17300 | 177m 40s (- 596m 1s) (17300 22%) 0.5281
Epoch 5 | Iter 17400 | 177m 47s (- 595m 53s) (17400 22%) 0.4677
Epoch 5 | Iter 17500 | 177m 55s (- 595m 42s) (17500 22%) 0.4437
Epoch 5 | Iter 17600 | 178m 2s (- 595m 31s) (17600 23%) 0.4139
Epoch 5 | Iter 17700 | 178m 8s (- 595m 19s) (17700 23%) 0.4018
Epoch 5 | Iter 17800 | 178m 15s (- 595m 7s) (17800 23%) 0.4258
Epoch 5 | Iter 17900 | 178m 22s (- 594m 56s) (17900 23%) 0.4610
Epoch 5 | Iter 18000 | 178m 29s (- 594m 44s) (18000 23%) 0.3660
Epoch 5 | Iter 18100 | 178m 36s (- 594m 32s) (18100 23%) 0.4081
Epoch 5 | Iter 18200 | 178m 42s (- 594m 20s) (

Epoch 6 | Iter 500 | 189m 56s (- 567m 14s) (500 25%) 0.6734
Epoch 6 | Iter 600 | 190m 10s (- 567m 24s) (600 25%) 0.6419
Epoch 6 | Iter 700 | 190m 23s (- 567m 33s) (700 25%) 0.6212
Epoch 6 | Iter 800 | 190m 37s (- 567m 42s) (800 25%) 0.5810
Epoch 6 | Iter 900 | 190m 50s (- 567m 50s) (900 25%) 0.5736
Epoch 6 | Iter 1000 | 191m 3s (- 567m 57s) (1000 25%) 0.5325
Epoch 6 | Iter 1100 | 191m 16s (- 568m 4s) (1100 25%) 0.5365
Epoch 6 | Iter 1200 | 191m 28s (- 568m 11s) (1200 25%) 0.5286
Epoch 6 | Iter 1300 | 191m 41s (- 568m 17s) (1300 25%) 0.5291
Epoch 6 | Iter 1400 | 191m 53s (- 568m 23s) (1400 25%) 0.5052
Epoch 6 | Iter 1500 | 192m 5s (- 568m 28s) (1500 25%) 0.4823
Epoch 6 | Iter 1600 | 192m 18s (- 568m 34s) (1600 25%) 0.4671
Epoch 6 | Iter 1700 | 192m 30s (- 568m 38s) (1700 25%) 0.4943
Epoch 6 | Iter 1800 | 192m 42s (- 568m 43s) (1800 25%) 0.4606
Epoch 6 | Iter 1900 | 192m 54s (- 568m 47s) (1900 25%) 0.4815
Epoch 6 | Iter 2000 | 193m 5s (- 568m 50s) (2000 25%) 0.4681
Epoch 6 | Iter 2100 | 

Epoch 6 | Iter 13700 | 211m 19s (- 561m 25s) (13700 27%) 0.3985
Epoch 6 | Iter 13800 | 211m 27s (- 561m 17s) (13800 27%) 0.3963
Epoch 6 | Iter 13900 | 211m 34s (- 561m 8s) (13900 27%) 0.4270
Epoch 6 | Iter 14000 | 211m 42s (- 561m 0s) (14000 27%) 0.3961
Epoch 6 | Iter 14100 | 211m 50s (- 560m 51s) (14100 27%) 0.5269
Epoch 6 | Iter 14200 | 211m 58s (- 560m 43s) (14200 27%) 0.3958
Epoch 6 | Iter 14300 | 212m 5s (- 560m 34s) (14300 27%) 0.3934
Epoch 6 | Iter 14400 | 212m 13s (- 560m 25s) (14400 27%) 0.4341
Epoch 6 | Iter 14500 | 212m 20s (- 560m 16s) (14500 27%) 0.3788
Epoch 6 | Iter 14600 | 212m 28s (- 560m 7s) (14600 27%) 0.4212
Epoch 6 | Iter 14700 | 212m 36s (- 559m 59s) (14700 27%) 0.4121
Epoch 6 | Iter 14800 | 212m 43s (- 559m 50s) (14800 27%) 0.4061
Epoch 6 | Iter 14900 | 212m 51s (- 559m 41s) (14900 27%) 0.4334
Epoch 6 | Iter 15000 | 212m 58s (- 559m 32s) (15000 27%) 0.4047
Epoch 6 | Iter 15100 | 213m 6s (- 559m 23s) (15100 27%) 0.4914
Epoch 6 | Iter 15200 | 213m 13s (- 559m 14s) 

Epoch 6 | Iter 26600 | 224m 32s (- 535m 8s) (26600 29%) 0.4234
Epoch 6 | Iter 26700 | 224m 36s (- 534m 53s) (26700 29%) 0.3208
Epoch 6 | Iter 26800 | 224m 41s (- 534m 38s) (26800 29%) 0.3561
Epoch 6 | Iter 26900 | 224m 46s (- 534m 22s) (26900 29%) 0.3952
Epoch 6 | Iter 27000 | 224m 50s (- 534m 7s) (27000 29%) 0.3563
Epoch 6 | Iter 27100 | 224m 55s (- 533m 51s) (27100 29%) 0.4165
Epoch 6 | Iter 27200 | 224m 59s (- 533m 36s) (27200 29%) 0.3629
Epoch 6 | Iter 27300 | 225m 4s (- 533m 21s) (27300 29%) 0.7228
Epoch 6 | Iter 27400 | 225m 9s (- 533m 6s) (27400 29%) 0.4076
Epoch 6 | Iter 27500 | 225m 13s (- 532m 50s) (27500 29%) 0.3890
Epoch 6 | Iter 27600 | 225m 18s (- 532m 34s) (27600 29%) 0.3227
Epoch 6 | Iter 27700 | 225m 22s (- 532m 18s) (27700 29%) 0.4215
Epoch 6 | Iter 27800 | 225m 26s (- 532m 2s) (27800 29%) 0.3762
Epoch 6 | Iter 27900 | 225m 31s (- 531m 46s) (27900 29%) 0.3797
Epoch 6 | Iter 28000 | 225m 35s (- 531m 30s) (28000 29%) 0.4101
Epoch 6 | Iter 28100 | 225m 40s (- 531m 15s) (

Epoch 7 | Iter 10800 | 245m 14s (- 524m 43s) (10800 31%) 0.3890
Epoch 7 | Iter 10900 | 245m 22s (- 524m 36s) (10900 31%) 0.3982
Epoch 7 | Iter 11000 | 245m 30s (- 524m 29s) (11000 31%) 0.3871
Epoch 7 | Iter 11100 | 245m 39s (- 524m 22s) (11100 31%) 0.3930
Epoch 7 | Iter 11200 | 245m 47s (- 524m 15s) (11200 31%) 0.4127
Epoch 7 | Iter 11300 | 245m 55s (- 524m 8s) (11300 31%) 0.4962
Epoch 7 | Iter 11400 | 246m 3s (- 524m 1s) (11400 31%) 0.4195
Epoch 7 | Iter 11500 | 246m 12s (- 523m 53s) (11500 31%) 0.3976
Epoch 7 | Iter 11600 | 246m 20s (- 523m 46s) (11600 31%) 0.4138
Epoch 7 | Iter 11700 | 246m 28s (- 523m 39s) (11700 32%) 0.4034
Epoch 7 | Iter 11800 | 246m 36s (- 523m 31s) (11800 32%) 0.4059
Epoch 7 | Iter 11900 | 246m 44s (- 523m 24s) (11900 32%) 0.3871
Epoch 7 | Iter 12000 | 246m 52s (- 523m 16s) (12000 32%) 0.4163
Epoch 7 | Iter 12100 | 247m 0s (- 523m 8s) (12100 32%) 0.3925
Epoch 7 | Iter 12200 | 247m 8s (- 523m 1s) (12200 32%) 0.4847
Epoch 7 | Iter 12300 | 247m 16s (- 522m 53s) (1

Epoch 7 | Iter 23700 | 260m 5s (- 503m 32s) (23700 34%) 0.3898
Epoch 7 | Iter 23800 | 260m 11s (- 503m 19s) (23800 34%) 0.4225
Epoch 7 | Iter 23900 | 260m 16s (- 503m 8s) (23900 34%) 0.6227
Epoch 7 | Iter 24000 | 260m 22s (- 502m 55s) (24000 34%) 0.4213
Epoch 7 | Iter 24100 | 260m 27s (- 502m 42s) (24100 34%) 0.3305
Epoch 7 | Iter 24200 | 260m 32s (- 502m 29s) (24200 34%) 0.3860
Epoch 7 | Iter 24300 | 260m 38s (- 502m 17s) (24300 34%) 0.3380
Epoch 7 | Iter 24400 | 260m 43s (- 502m 4s) (24400 34%) 0.4174
Epoch 7 | Iter 24500 | 260m 48s (- 501m 51s) (24500 34%) 0.3675
Epoch 7 | Iter 24600 | 260m 53s (- 501m 38s) (24600 34%) 0.3452
Epoch 7 | Iter 24700 | 260m 58s (- 501m 25s) (24700 34%) 0.3874
Epoch 7 | Iter 24800 | 261m 4s (- 501m 12s) (24800 34%) 0.3140
Epoch 7 | Iter 24900 | 261m 9s (- 500m 59s) (24900 34%) 0.3986
Epoch 7 | Iter 25000 | 261m 14s (- 500m 46s) (25000 34%) 0.4010
Epoch 7 | Iter 25100 | 261m 19s (- 500m 34s) (25100 34%) 0.6059
Epoch 7 | Iter 25200 | 261m 25s (- 500m 21s) 

Epoch 8 | Iter 7800 | 278m 41s (- 488m 16s) (7800 36%) 0.3954
Epoch 8 | Iter 7900 | 278m 50s (- 488m 11s) (7900 36%) 0.3979
Epoch 8 | Iter 8000 | 278m 59s (- 488m 5s) (8000 36%) 0.3967
Epoch 8 | Iter 8100 | 279m 8s (- 487m 59s) (8100 36%) 0.3890
Epoch 8 | Iter 8200 | 279m 17s (- 487m 53s) (8200 36%) 0.4620
Epoch 8 | Iter 8300 | 279m 26s (- 487m 47s) (8300 36%) 0.3895
Epoch 8 | Iter 8400 | 279m 35s (- 487m 41s) (8400 36%) 0.3950
Epoch 8 | Iter 8500 | 279m 44s (- 487m 35s) (8500 36%) 0.3962
Epoch 8 | Iter 8600 | 279m 53s (- 487m 29s) (8600 36%) 0.3801
Epoch 8 | Iter 8700 | 280m 2s (- 487m 23s) (8700 36%) 0.4107
Epoch 8 | Iter 8800 | 280m 10s (- 487m 16s) (8800 36%) 0.4153
Epoch 8 | Iter 8900 | 280m 19s (- 487m 10s) (8900 36%) 0.4621
Epoch 8 | Iter 9000 | 280m 28s (- 487m 3s) (9000 36%) 0.4090
Epoch 8 | Iter 9100 | 280m 36s (- 486m 57s) (9100 36%) 0.4131
Epoch 8 | Iter 9200 | 280m 45s (- 486m 50s) (9200 36%) 0.3920
Epoch 8 | Iter 9300 | 280m 54s (- 486m 43s) (9300 36%) 0.4123
Epoch 8 | It

Epoch 8 | Iter 20800 | 295m 4s (- 470m 5s) (20800 38%) 0.3879
Epoch 8 | Iter 20900 | 295m 10s (- 469m 54s) (20900 38%) 0.3511
Epoch 8 | Iter 21000 | 295m 16s (- 469m 44s) (21000 38%) 0.4100
Epoch 8 | Iter 21100 | 295m 22s (- 469m 33s) (21100 38%) 0.3656
Epoch 8 | Iter 21200 | 295m 28s (- 469m 23s) (21200 38%) 0.3954
Epoch 8 | Iter 21300 | 295m 34s (- 469m 12s) (21300 38%) 0.3190
Epoch 8 | Iter 21400 | 295m 41s (- 469m 1s) (21400 38%) 0.4164
Epoch 8 | Iter 21500 | 295m 46s (- 468m 50s) (21500 38%) 0.3381
Epoch 8 | Iter 21600 | 295m 53s (- 468m 40s) (21600 38%) 0.4960
Epoch 8 | Iter 21700 | 295m 59s (- 468m 29s) (21700 38%) 0.5032
Epoch 8 | Iter 21800 | 296m 5s (- 468m 18s) (21800 38%) 0.3445
Epoch 8 | Iter 21900 | 296m 11s (- 468m 7s) (21900 38%) 0.3991
Epoch 8 | Iter 22000 | 296m 17s (- 467m 56s) (22000 38%) 0.3950
Epoch 8 | Iter 22100 | 296m 22s (- 467m 45s) (22100 38%) 0.3059
Epoch 8 | Iter 22200 | 296m 28s (- 467m 34s) (22200 38%) 0.4113
Epoch 8 | Iter 22300 | 296m 34s (- 467m 23s) 

Epoch 9 | Iter 4800 | 311m 46s (- 451m 57s) (4800 40%) 0.4109
Epoch 9 | Iter 4900 | 311m 56s (- 451m 53s) (4900 40%) 0.4233
Epoch 9 | Iter 5000 | 312m 6s (- 451m 48s) (5000 40%) 0.4109
Epoch 9 | Iter 5100 | 312m 16s (- 451m 44s) (5100 40%) 0.4058
Epoch 9 | Iter 5200 | 312m 27s (- 451m 39s) (5200 40%) 0.4183
Epoch 9 | Iter 5300 | 312m 37s (- 451m 35s) (5300 40%) 0.4205
Epoch 9 | Iter 5400 | 312m 47s (- 451m 31s) (5400 40%) 0.3998
Epoch 9 | Iter 5500 | 312m 58s (- 451m 26s) (5500 40%) 0.3973
Epoch 9 | Iter 5600 | 313m 8s (- 451m 22s) (5600 40%) 0.4004
Epoch 9 | Iter 5700 | 313m 18s (- 451m 17s) (5700 40%) 0.3969
Epoch 9 | Iter 5800 | 313m 28s (- 451m 13s) (5800 40%) 0.4527
Epoch 9 | Iter 5900 | 313m 38s (- 451m 8s) (5900 41%) 0.4182
Epoch 9 | Iter 6000 | 313m 48s (- 451m 2s) (6000 41%) 0.4114
Epoch 9 | Iter 6100 | 313m 57s (- 450m 57s) (6100 41%) 0.4189
Epoch 9 | Iter 6200 | 314m 7s (- 450m 52s) (6200 41%) 0.4011
Epoch 9 | Iter 6300 | 314m 16s (- 450m 46s) (6300 41%) 0.4587
Epoch 9 | Ite

Epoch 9 | Iter 17900 | 329m 53s (- 436m 6s) (17900 43%) 0.4274
Epoch 9 | Iter 18000 | 329m 59s (- 435m 57s) (18000 43%) 0.3455
Epoch 9 | Iter 18100 | 330m 6s (- 435m 48s) (18100 43%) 0.3874
Epoch 9 | Iter 18200 | 330m 13s (- 435m 38s) (18200 43%) 0.3801
Epoch 9 | Iter 18300 | 330m 20s (- 435m 29s) (18300 43%) 0.4512
Epoch 9 | Iter 18400 | 330m 27s (- 435m 21s) (18400 43%) 0.4933
Epoch 9 | Iter 18500 | 330m 34s (- 435m 11s) (18500 43%) 0.3851
Epoch 9 | Iter 18600 | 330m 41s (- 435m 2s) (18600 43%) 0.3703
Epoch 9 | Iter 18700 | 330m 48s (- 434m 53s) (18700 43%) 0.4053
Epoch 9 | Iter 18800 | 330m 55s (- 434m 44s) (18800 43%) 0.3494
Epoch 9 | Iter 18900 | 331m 2s (- 434m 34s) (18900 43%) 0.3881
Epoch 9 | Iter 19000 | 331m 8s (- 434m 25s) (19000 43%) 0.3943
Epoch 9 | Iter 19100 | 331m 15s (- 434m 16s) (19100 43%) 0.3641
Epoch 9 | Iter 19200 | 331m 22s (- 434m 6s) (19200 43%) 0.3921
Epoch 9 | Iter 19300 | 331m 28s (- 433m 57s) (19300 43%) 0.3738
Epoch 9 | Iter 19400 | 331m 35s (- 433m 47s) (

Epoch 10 | Iter 1800 | 344m 15s (- 415m 32s) (1800 45%) 0.4586
Epoch 10 | Iter 1900 | 344m 26s (- 415m 29s) (1900 45%) 0.4759
Epoch 10 | Iter 2000 | 344m 38s (- 415m 26s) (2000 45%) 0.4661
Epoch 10 | Iter 2100 | 344m 50s (- 415m 23s) (2100 45%) 0.4576
Epoch 10 | Iter 2200 | 345m 2s (- 415m 20s) (2200 45%) 0.4430
Epoch 10 | Iter 2300 | 345m 14s (- 415m 17s) (2300 45%) 0.4354
Epoch 10 | Iter 2400 | 345m 25s (- 415m 14s) (2400 45%) 0.4754
Epoch 10 | Iter 2500 | 345m 37s (- 415m 11s) (2500 45%) 0.4365
Epoch 10 | Iter 2600 | 345m 48s (- 415m 7s) (2600 45%) 0.4662
Epoch 10 | Iter 2700 | 346m 0s (- 415m 4s) (2700 45%) 0.4234
Epoch 10 | Iter 2800 | 346m 11s (- 415m 0s) (2800 45%) 0.4558
Epoch 10 | Iter 2900 | 346m 23s (- 414m 57s) (2900 45%) 0.4516
Epoch 10 | Iter 3000 | 346m 34s (- 414m 53s) (3000 45%) 0.4611
Epoch 10 | Iter 3100 | 346m 45s (- 414m 49s) (3100 45%) 0.4307
Epoch 10 | Iter 3200 | 346m 56s (- 414m 45s) (3200 45%) 0.4133
Epoch 10 | Iter 3300 | 347m 7s (- 414m 41s) (3300 45%) 0.445

Epoch 10 | Iter 14800 | 364m 14s (- 402m 0s) (14800 47%) 0.3803
Epoch 10 | Iter 14900 | 364m 21s (- 401m 52s) (14900 47%) 0.4120
Epoch 10 | Iter 15000 | 364m 29s (- 401m 43s) (15000 47%) 0.3857
Epoch 10 | Iter 15100 | 364m 36s (- 401m 35s) (15100 47%) 0.4647
Epoch 10 | Iter 15200 | 364m 44s (- 401m 27s) (15200 47%) 0.4630
Epoch 10 | Iter 15300 | 364m 51s (- 401m 18s) (15300 47%) 0.3817
Epoch 10 | Iter 15400 | 364m 58s (- 401m 10s) (15400 47%) 0.3893
Epoch 10 | Iter 15500 | 365m 5s (- 401m 1s) (15500 47%) 0.3749
Epoch 10 | Iter 15600 | 365m 13s (- 400m 52s) (15600 47%) 0.4198
Epoch 10 | Iter 15700 | 365m 20s (- 400m 44s) (15700 47%) 0.3994
Epoch 10 | Iter 15800 | 365m 27s (- 400m 35s) (15800 47%) 0.3884
Epoch 10 | Iter 15900 | 365m 34s (- 400m 26s) (15900 47%) 0.4216
Epoch 10 | Iter 16000 | 365m 41s (- 400m 18s) (16000 47%) 0.4102
Epoch 10 | Iter 16100 | 365m 48s (- 400m 9s) (16100 47%) 0.3851
Epoch 10 | Iter 16200 | 365m 55s (- 400m 0s) (16200 47%) 0.4863
Epoch 10 | Iter 16300 | 366m 2

Epoch 10 | Iter 27500 | 376m 55s (- 381m 18s) (27500 49%) 0.3626
Epoch 10 | Iter 27600 | 376m 59s (- 381m 6s) (27600 49%) 0.2958
Epoch 10 | Iter 27700 | 377m 3s (- 380m 55s) (27700 49%) 0.4082
Epoch 10 | Iter 27800 | 377m 8s (- 380m 44s) (27800 49%) 0.3682
Epoch 10 | Iter 27900 | 377m 12s (- 380m 32s) (27900 49%) 0.3803
Epoch 10 | Iter 28000 | 377m 16s (- 380m 21s) (28000 49%) 0.3945
Epoch 10 | Iter 28100 | 377m 21s (- 380m 10s) (28100 49%) 0.6615
Epoch 10 | Iter 28200 | 377m 25s (- 379m 59s) (28200 49%) 0.3929
Epoch 10 | Iter 28300 | 377m 29s (- 379m 47s) (28300 49%) 0.3216
Epoch 10 | Iter 28400 | 377m 33s (- 379m 36s) (28400 49%) 0.3751
Epoch 10 | Iter 28500 | 377m 37s (- 379m 24s) (28500 49%) 0.3823
Epoch 10 | Iter 28600 | 377m 41s (- 379m 13s) (28600 49%) 0.5828
Epoch 10 | Iter 28700 | 377m 45s (- 379m 1s) (28700 49%) 0.4507
Epoch 10 | Iter 28800 | 377m 49s (- 378m 49s) (28800 49%) 0.3235
Epoch 10 | Iter 28900 | 377m 53s (- 378m 38s) (28900 49%) 0.3868
Epoch 10 | Iter 29000 | 377m 

Epoch 11 | Iter 11500 | 397m 58s (- 367m 48s) (11500 51%) 0.3829
Epoch 11 | Iter 11600 | 398m 6s (- 367m 40s) (11600 51%) 0.3976
Epoch 11 | Iter 11700 | 398m 14s (- 367m 33s) (11700 52%) 0.3803
Epoch 11 | Iter 11800 | 398m 23s (- 367m 25s) (11800 52%) 0.3836
Epoch 11 | Iter 11900 | 398m 31s (- 367m 17s) (11900 52%) 0.3559
Epoch 11 | Iter 12000 | 398m 39s (- 367m 10s) (12000 52%) 0.3879
Epoch 11 | Iter 12100 | 398m 47s (- 367m 2s) (12100 52%) 0.3643
Epoch 11 | Iter 12200 | 398m 55s (- 366m 54s) (12200 52%) 0.4675
Epoch 11 | Iter 12300 | 399m 3s (- 366m 46s) (12300 52%) 0.3622
Epoch 11 | Iter 12400 | 399m 12s (- 366m 39s) (12400 52%) 0.3962
Epoch 11 | Iter 12500 | 399m 20s (- 366m 32s) (12500 52%) 0.3781
Epoch 11 | Iter 12600 | 399m 28s (- 366m 24s) (12600 52%) 0.4046
Epoch 11 | Iter 12700 | 399m 36s (- 366m 16s) (12700 52%) 0.3841
Epoch 11 | Iter 12800 | 399m 44s (- 366m 8s) (12800 52%) 0.3854
Epoch 11 | Iter 12900 | 399m 51s (- 366m 0s) (12900 52%) 0.3849
Epoch 11 | Iter 13000 | 399m 5

Epoch 11 | Iter 24200 | 412m 12s (- 349m 5s) (24200 54%) 0.3642
Epoch 11 | Iter 24300 | 412m 18s (- 348m 55s) (24300 54%) 0.3177
Epoch 11 | Iter 24400 | 412m 23s (- 348m 45s) (24400 54%) 0.4034
Epoch 11 | Iter 24500 | 412m 28s (- 348m 35s) (24500 54%) 0.3576
Epoch 11 | Iter 24600 | 412m 33s (- 348m 25s) (24600 54%) 0.3372
Epoch 11 | Iter 24700 | 412m 38s (- 348m 15s) (24700 54%) 0.3712
Epoch 11 | Iter 24800 | 412m 44s (- 348m 5s) (24800 54%) 0.2804
Epoch 11 | Iter 24900 | 412m 49s (- 347m 55s) (24900 54%) 0.3760
Epoch 11 | Iter 25000 | 412m 54s (- 347m 45s) (25000 54%) 0.3738
Epoch 11 | Iter 25100 | 412m 59s (- 347m 35s) (25100 54%) 0.5832
Epoch 11 | Iter 25200 | 413m 5s (- 347m 25s) (25200 54%) 0.3744
Epoch 11 | Iter 25300 | 413m 10s (- 347m 15s) (25300 54%) 0.4057
Epoch 11 | Iter 25400 | 413m 15s (- 347m 5s) (25400 54%) 0.3067
Epoch 11 | Iter 25500 | 413m 20s (- 346m 54s) (25500 54%) 0.3219
Epoch 11 | Iter 25600 | 413m 25s (- 346m 44s) (25600 54%) 0.3736
Epoch 11 | Iter 25700 | 413m 

Epoch 12 | Iter 8100 | 430m 52s (- 333m 15s) (8100 56%) 0.3803
Epoch 12 | Iter 8200 | 431m 2s (- 333m 8s) (8200 56%) 0.4458
Epoch 12 | Iter 8300 | 431m 11s (- 333m 1s) (8300 56%) 0.3782
Epoch 12 | Iter 8400 | 431m 20s (- 332m 55s) (8400 56%) 0.3845
Epoch 12 | Iter 8500 | 431m 29s (- 332m 48s) (8500 56%) 0.3801
Epoch 12 | Iter 8600 | 431m 38s (- 332m 41s) (8600 56%) 0.3641
Epoch 12 | Iter 8700 | 431m 47s (- 332m 34s) (8700 56%) 0.3929
Epoch 12 | Iter 8800 | 431m 56s (- 332m 27s) (8800 56%) 0.4102
Epoch 12 | Iter 8900 | 432m 5s (- 332m 20s) (8900 56%) 0.4377
Epoch 12 | Iter 9000 | 432m 14s (- 332m 13s) (9000 56%) 0.3957
Epoch 12 | Iter 9100 | 432m 22s (- 332m 5s) (9100 56%) 0.4004
Epoch 12 | Iter 9200 | 432m 31s (- 331m 58s) (9200 56%) 0.3866
Epoch 12 | Iter 9300 | 432m 40s (- 331m 51s) (9300 56%) 0.3993
Epoch 12 | Iter 9400 | 432m 49s (- 331m 44s) (9400 56%) 0.3737
Epoch 12 | Iter 9500 | 432m 57s (- 331m 37s) (9500 56%) 0.3680
Epoch 12 | Iter 9600 | 433m 6s (- 331m 30s) (9600 56%) 0.417

Epoch 12 | Iter 20900 | 447m 2s (- 316m 4s) (20900 58%) 0.3294
Epoch 12 | Iter 21000 | 447m 8s (- 315m 55s) (21000 58%) 0.3802
Epoch 12 | Iter 21100 | 447m 14s (- 315m 46s) (21100 58%) 0.3431
Epoch 12 | Iter 21200 | 447m 20s (- 315m 37s) (21200 58%) 0.3850
Epoch 12 | Iter 21300 | 447m 26s (- 315m 28s) (21300 58%) 0.3128
Epoch 12 | Iter 21400 | 447m 32s (- 315m 19s) (21400 58%) 0.3983
Epoch 12 | Iter 21500 | 447m 38s (- 315m 10s) (21500 58%) 0.3364
Epoch 12 | Iter 21600 | 447m 44s (- 315m 1s) (21600 58%) 0.4833
Epoch 12 | Iter 21700 | 447m 50s (- 314m 52s) (21700 58%) 0.4826
Epoch 12 | Iter 21800 | 447m 56s (- 314m 42s) (21800 58%) 0.3281
Epoch 12 | Iter 21900 | 448m 2s (- 314m 33s) (21900 58%) 0.3751
Epoch 12 | Iter 22000 | 448m 8s (- 314m 24s) (22000 58%) 0.3833
Epoch 12 | Iter 22100 | 448m 14s (- 314m 15s) (22100 58%) 0.2931
Epoch 12 | Iter 22200 | 448m 20s (- 314m 5s) (22200 58%) 0.4023
Epoch 12 | Iter 22300 | 448m 25s (- 313m 56s) (22300 58%) 0.3626
Epoch 12 | Iter 22400 | 448m 31s

Epoch 13 | Iter 4700 | 463m 23s (- 298m 41s) (4700 60%) 0.4096
Epoch 13 | Iter 4800 | 463m 33s (- 298m 35s) (4800 60%) 0.4057
Epoch 13 | Iter 4900 | 463m 44s (- 298m 29s) (4900 60%) 0.4145
Epoch 13 | Iter 5000 | 463m 54s (- 298m 23s) (5000 60%) 0.3967
Epoch 13 | Iter 5100 | 464m 4s (- 298m 16s) (5100 60%) 0.3917
Epoch 13 | Iter 5200 | 464m 14s (- 298m 10s) (5200 60%) 0.4038
Epoch 13 | Iter 5300 | 464m 24s (- 298m 3s) (5300 60%) 0.4036
Epoch 13 | Iter 5400 | 464m 34s (- 297m 57s) (5400 60%) 0.3879
Epoch 13 | Iter 5500 | 464m 44s (- 297m 50s) (5500 60%) 0.4022
Epoch 13 | Iter 5600 | 464m 54s (- 297m 44s) (5600 60%) 0.3991
Epoch 13 | Iter 5700 | 465m 3s (- 297m 37s) (5700 60%) 0.3856
Epoch 13 | Iter 5800 | 465m 13s (- 297m 31s) (5800 60%) 0.5178
Epoch 13 | Iter 5900 | 465m 23s (- 297m 24s) (5900 61%) 0.4384
Epoch 13 | Iter 6000 | 465m 33s (- 297m 17s) (6000 61%) 0.4099
Epoch 13 | Iter 6100 | 465m 42s (- 297m 11s) (6100 61%) 0.4089
Epoch 13 | Iter 6200 | 465m 52s (- 297m 4s) (6200 61%) 0.3

Epoch 13 | Iter 17600 | 480m 32s (- 282m 2s) (17600 63%) 0.3805
Epoch 13 | Iter 17700 | 480m 37s (- 281m 53s) (17700 63%) 0.3582
Epoch 13 | Iter 17800 | 480m 43s (- 281m 43s) (17800 63%) 0.3904
Epoch 13 | Iter 17900 | 480m 48s (- 281m 34s) (17900 63%) 0.4242
Epoch 13 | Iter 18000 | 480m 54s (- 281m 25s) (18000 63%) 0.3369
Epoch 13 | Iter 18100 | 480m 59s (- 281m 16s) (18100 63%) 0.3653
Epoch 13 | Iter 18200 | 481m 5s (- 281m 7s) (18200 63%) 0.3792
Epoch 13 | Iter 18300 | 481m 10s (- 280m 57s) (18300 63%) 0.4342
Epoch 13 | Iter 18400 | 481m 16s (- 280m 48s) (18400 63%) 0.4771
Epoch 13 | Iter 18500 | 481m 23s (- 280m 40s) (18500 63%) 0.3837
Epoch 13 | Iter 18600 | 481m 29s (- 280m 31s) (18600 63%) 0.3746
Epoch 13 | Iter 18700 | 481m 36s (- 280m 23s) (18700 63%) 0.4180
Epoch 13 | Iter 18800 | 481m 42s (- 280m 14s) (18800 63%) 0.3458
Epoch 13 | Iter 18900 | 481m 49s (- 280m 6s) (18900 63%) 0.3842
Epoch 13 | Iter 19000 | 481m 56s (- 279m 57s) (19000 63%) 0.3963
Epoch 13 | Iter 19100 | 482m 

Epoch 14 | Iter 1300 | 494m 2s (- 263m 25s) (1300 65%) 0.4960
Epoch 14 | Iter 1400 | 494m 14s (- 263m 20s) (1400 65%) 0.4689
Epoch 14 | Iter 1500 | 494m 27s (- 263m 15s) (1500 65%) 0.4380
Epoch 14 | Iter 1600 | 494m 39s (- 263m 9s) (1600 65%) 0.4272
Epoch 14 | Iter 1700 | 494m 51s (- 263m 4s) (1700 65%) 0.4522
Epoch 14 | Iter 1800 | 495m 3s (- 262m 58s) (1800 65%) 0.4200
Epoch 14 | Iter 1900 | 495m 15s (- 262m 52s) (1900 65%) 0.4397
Epoch 14 | Iter 2000 | 495m 27s (- 262m 47s) (2000 65%) 0.4391
Epoch 14 | Iter 2100 | 495m 39s (- 262m 41s) (2100 65%) 0.4262
Epoch 14 | Iter 2200 | 495m 50s (- 262m 35s) (2200 65%) 0.4158
Epoch 14 | Iter 2300 | 496m 2s (- 262m 30s) (2300 65%) 0.4069
Epoch 14 | Iter 2400 | 496m 14s (- 262m 24s) (2400 65%) 0.4462
Epoch 14 | Iter 2500 | 496m 26s (- 262m 18s) (2500 65%) 0.4102
Epoch 14 | Iter 2600 | 496m 37s (- 262m 12s) (2600 65%) 0.4335
Epoch 14 | Iter 2700 | 496m 49s (- 262m 7s) (2700 65%) 0.4018
Epoch 14 | Iter 2800 | 497m 0s (- 262m 1s) (2800 65%) 0.4190


Epoch 14 | Iter 14300 | 514m 30s (- 248m 17s) (14300 67%) 0.3604
Epoch 14 | Iter 14400 | 514m 37s (- 248m 9s) (14400 67%) 0.4232
Epoch 14 | Iter 14500 | 514m 45s (- 248m 1s) (14500 67%) 0.3614
Epoch 14 | Iter 14600 | 514m 52s (- 247m 53s) (14600 67%) 0.3844
Epoch 14 | Iter 14700 | 515m 0s (- 247m 45s) (14700 67%) 0.3838
Epoch 14 | Iter 14800 | 515m 7s (- 247m 37s) (14800 67%) 0.3674
Epoch 14 | Iter 14900 | 515m 15s (- 247m 29s) (14900 67%) 0.3985
Epoch 14 | Iter 15000 | 515m 22s (- 247m 21s) (15000 67%) 0.3740
Epoch 14 | Iter 15100 | 515m 30s (- 247m 13s) (15100 67%) 0.4429
Epoch 14 | Iter 15200 | 515m 37s (- 247m 5s) (15200 67%) 0.4513
Epoch 14 | Iter 15300 | 515m 45s (- 246m 57s) (15300 67%) 0.3724
Epoch 14 | Iter 15400 | 515m 52s (- 246m 49s) (15400 67%) 0.3750
Epoch 14 | Iter 15500 | 516m 0s (- 246m 41s) (15500 67%) 0.3657
Epoch 14 | Iter 15600 | 516m 7s (- 246m 33s) (15600 67%) 0.3998
Epoch 14 | Iter 15700 | 516m 14s (- 246m 25s) (15700 67%) 0.3769
Epoch 14 | Iter 15800 | 516m 21s

Epoch 14 | Iter 27000 | 527m 30s (- 230m 7s) (27000 69%) 0.3292
Epoch 14 | Iter 27100 | 527m 34s (- 229m 58s) (27100 69%) 0.3826
Epoch 14 | Iter 27200 | 527m 39s (- 229m 49s) (27200 69%) 0.3383
Epoch 14 | Iter 27300 | 527m 44s (- 229m 40s) (27300 69%) 0.6620
Epoch 14 | Iter 27400 | 527m 48s (- 229m 30s) (27400 69%) 0.3685
Epoch 14 | Iter 27500 | 527m 53s (- 229m 21s) (27500 69%) 0.3536
Epoch 14 | Iter 27600 | 527m 57s (- 229m 12s) (27600 69%) 0.2864
Epoch 14 | Iter 27700 | 528m 1s (- 229m 3s) (27700 69%) 0.3984
Epoch 14 | Iter 27800 | 528m 6s (- 228m 53s) (27800 69%) 0.3648
Epoch 14 | Iter 27900 | 528m 10s (- 228m 44s) (27900 69%) 0.3635
Epoch 14 | Iter 28000 | 528m 14s (- 228m 35s) (28000 69%) 0.3899
Epoch 14 | Iter 28100 | 528m 19s (- 228m 26s) (28100 69%) 0.6584
Epoch 14 | Iter 28200 | 528m 23s (- 228m 16s) (28200 69%) 0.3940
Epoch 14 | Iter 28300 | 528m 27s (- 228m 7s) (28300 69%) 0.3243
Epoch 14 | Iter 28400 | 528m 31s (- 227m 58s) (28400 69%) 0.3835
Epoch 14 | Iter 28500 | 528m 3

Epoch 15 | Iter 11000 | 548m 14s (- 214m 25s) (11000 71%) 0.3585
Epoch 15 | Iter 11100 | 548m 22s (- 214m 18s) (11100 71%) 0.3636
Epoch 15 | Iter 11200 | 548m 30s (- 214m 10s) (11200 71%) 0.3823
Epoch 15 | Iter 11300 | 548m 38s (- 214m 2s) (11300 71%) 0.4633
Epoch 15 | Iter 11400 | 548m 47s (- 213m 54s) (11400 71%) 0.3795
Epoch 15 | Iter 11500 | 548m 55s (- 213m 47s) (11500 71%) 0.3650
Epoch 15 | Iter 11600 | 549m 3s (- 213m 39s) (11600 71%) 0.3765
Epoch 15 | Iter 11700 | 549m 11s (- 213m 31s) (11700 72%) 0.3587
Epoch 15 | Iter 11800 | 549m 19s (- 213m 23s) (11800 72%) 0.3776
Epoch 15 | Iter 11900 | 549m 27s (- 213m 16s) (11900 72%) 0.3466
Epoch 15 | Iter 12000 | 549m 35s (- 213m 8s) (12000 72%) 0.3816
Epoch 15 | Iter 12100 | 549m 43s (- 213m 0s) (12100 72%) 0.3563
Epoch 15 | Iter 12200 | 549m 51s (- 212m 52s) (12200 72%) 0.4521
Epoch 15 | Iter 12300 | 549m 59s (- 212m 44s) (12300 72%) 0.3464
Epoch 15 | Iter 12400 | 550m 6s (- 212m 37s) (12400 72%) 0.3787
Epoch 15 | Iter 12500 | 550m 1

Epoch 15 | Iter 23700 | 562m 21s (- 196m 58s) (23700 74%) 0.3549
Epoch 15 | Iter 23800 | 562m 26s (- 196m 49s) (23800 74%) 0.3934
Epoch 15 | Iter 23900 | 562m 32s (- 196m 40s) (23900 74%) 0.5813
Epoch 15 | Iter 24000 | 562m 37s (- 196m 32s) (24000 74%) 0.3842
Epoch 15 | Iter 24100 | 562m 42s (- 196m 23s) (24100 74%) 0.3087
Epoch 15 | Iter 24200 | 562m 48s (- 196m 14s) (24200 74%) 0.3618
Epoch 15 | Iter 24300 | 562m 53s (- 196m 6s) (24300 74%) 0.3120
Epoch 15 | Iter 24400 | 562m 58s (- 195m 57s) (24400 74%) 0.3928
Epoch 15 | Iter 24500 | 563m 3s (- 195m 48s) (24500 74%) 0.3459
Epoch 15 | Iter 24600 | 563m 8s (- 195m 40s) (24600 74%) 0.3279
Epoch 15 | Iter 24700 | 563m 14s (- 195m 31s) (24700 74%) 0.3646
Epoch 15 | Iter 24800 | 563m 19s (- 195m 22s) (24800 74%) 0.2633
Epoch 15 | Iter 24900 | 563m 24s (- 195m 13s) (24900 74%) 0.3789
Epoch 15 | Iter 25000 | 563m 29s (- 195m 5s) (25000 74%) 0.3762
Epoch 15 | Iter 25100 | 563m 34s (- 194m 56s) (25100 74%) 0.5800
Epoch 15 | Iter 25200 | 563m 

Epoch 16 | Iter 7600 | 580m 42s (- 180m 21s) (7600 76%) 0.4000
Epoch 16 | Iter 7700 | 580m 51s (- 180m 13s) (7700 76%) 0.3840
Epoch 16 | Iter 7800 | 581m 0s (- 180m 6s) (7800 76%) 0.3781
Epoch 16 | Iter 7900 | 581m 9s (- 179m 59s) (7900 76%) 0.3870
Epoch 16 | Iter 8000 | 581m 18s (- 179m 51s) (8000 76%) 0.3817
Epoch 16 | Iter 8100 | 581m 27s (- 179m 44s) (8100 76%) 0.4135
Epoch 16 | Iter 8200 | 581m 36s (- 179m 36s) (8200 76%) 0.4784
Epoch 16 | Iter 8300 | 581m 45s (- 179m 29s) (8300 76%) 0.3881
Epoch 16 | Iter 8400 | 581m 54s (- 179m 21s) (8400 76%) 0.3889
Epoch 16 | Iter 8500 | 582m 3s (- 179m 14s) (8500 76%) 0.3768
Epoch 16 | Iter 8600 | 582m 12s (- 179m 6s) (8600 76%) 0.3571
Epoch 16 | Iter 8700 | 582m 21s (- 178m 59s) (8700 76%) 0.3866
Epoch 16 | Iter 8800 | 582m 30s (- 178m 51s) (8800 76%) 0.3918
Epoch 16 | Iter 8900 | 582m 39s (- 178m 44s) (8900 76%) 0.4247
Epoch 16 | Iter 9000 | 582m 47s (- 178m 36s) (9000 76%) 0.3799
Epoch 16 | Iter 9100 | 582m 56s (- 178m 29s) (9100 76%) 0.38

Epoch 16 | Iter 20400 | 596m 58s (- 163m 33s) (20400 78%) 0.3457
Epoch 16 | Iter 20500 | 597m 4s (- 163m 24s) (20500 78%) 0.4948
Epoch 16 | Iter 20600 | 597m 10s (- 163m 16s) (20600 78%) 0.4352
Epoch 16 | Iter 20700 | 597m 17s (- 163m 8s) (20700 78%) 0.3762
Epoch 16 | Iter 20800 | 597m 23s (- 163m 0s) (20800 78%) 0.3743
Epoch 16 | Iter 20900 | 597m 29s (- 162m 51s) (20900 78%) 0.3371
Epoch 16 | Iter 21000 | 597m 35s (- 162m 43s) (21000 78%) 0.3828
Epoch 16 | Iter 21100 | 597m 41s (- 162m 35s) (21100 78%) 0.3396
Epoch 16 | Iter 21200 | 597m 47s (- 162m 26s) (21200 78%) 0.3740
Epoch 16 | Iter 21300 | 597m 53s (- 162m 18s) (21300 78%) 0.3097
Epoch 16 | Iter 21400 | 597m 59s (- 162m 10s) (21400 78%) 0.3921
Epoch 16 | Iter 21500 | 598m 5s (- 162m 2s) (21500 78%) 0.3287
Epoch 16 | Iter 21600 | 598m 11s (- 161m 53s) (21600 78%) 0.4793
Epoch 16 | Iter 21700 | 598m 17s (- 161m 45s) (21700 78%) 0.4731
Epoch 16 | Iter 21800 | 598m 23s (- 161m 37s) (21800 78%) 0.3301
Epoch 16 | Iter 21900 | 598m 2

Epoch 17 | Iter 4200 | 613m 1s (- 146m 25s) (4200 80%) 0.3860
Epoch 17 | Iter 4300 | 613m 11s (- 146m 18s) (4300 80%) 0.3931
Epoch 17 | Iter 4400 | 613m 22s (- 146m 11s) (4400 80%) 0.4254
Epoch 17 | Iter 4500 | 613m 32s (- 146m 3s) (4500 80%) 0.3796
Epoch 17 | Iter 4600 | 613m 42s (- 145m 56s) (4600 80%) 0.3782
Epoch 17 | Iter 4700 | 613m 53s (- 145m 49s) (4700 80%) 0.3940
Epoch 17 | Iter 4800 | 614m 3s (- 145m 42s) (4800 80%) 0.3943
Epoch 17 | Iter 4900 | 614m 13s (- 145m 34s) (4900 80%) 0.4055
Epoch 17 | Iter 5000 | 614m 23s (- 145m 27s) (5000 80%) 0.3848
Epoch 17 | Iter 5100 | 614m 33s (- 145m 20s) (5100 80%) 0.3769
Epoch 17 | Iter 5200 | 614m 43s (- 145m 13s) (5200 80%) 0.3902
Epoch 17 | Iter 5300 | 614m 53s (- 145m 5s) (5300 80%) 0.3923
Epoch 17 | Iter 5400 | 615m 3s (- 144m 58s) (5400 80%) 0.3806
Epoch 17 | Iter 5500 | 615m 13s (- 144m 51s) (5500 80%) 0.3784
Epoch 17 | Iter 5600 | 615m 23s (- 144m 43s) (5600 80%) 0.3895
Epoch 17 | Iter 5700 | 615m 32s (- 144m 36s) (5700 80%) 0.37

Epoch 17 | Iter 17100 | 631m 3s (- 129m 54s) (17100 82%) 0.4030
Epoch 17 | Iter 17200 | 631m 10s (- 129m 46s) (17200 82%) 0.3788
Epoch 17 | Iter 17300 | 631m 17s (- 129m 38s) (17300 82%) 0.4724
Epoch 17 | Iter 17400 | 631m 24s (- 129m 30s) (17400 82%) 0.4330
Epoch 17 | Iter 17500 | 631m 31s (- 129m 22s) (17500 82%) 0.4045
Epoch 17 | Iter 17600 | 631m 38s (- 129m 14s) (17600 83%) 0.3812
Epoch 17 | Iter 17700 | 631m 44s (- 129m 5s) (17700 83%) 0.3624
Epoch 17 | Iter 17800 | 631m 51s (- 128m 57s) (17800 83%) 0.3899
Epoch 17 | Iter 17900 | 631m 58s (- 128m 49s) (17900 83%) 0.4154
Epoch 17 | Iter 18000 | 632m 5s (- 128m 41s) (18000 83%) 0.3333
Epoch 17 | Iter 18100 | 632m 12s (- 128m 33s) (18100 83%) 0.3704
Epoch 17 | Iter 18200 | 632m 18s (- 128m 25s) (18200 83%) 0.3700
Epoch 17 | Iter 18300 | 632m 25s (- 128m 17s) (18300 83%) 0.4403
Epoch 17 | Iter 18400 | 632m 32s (- 128m 9s) (18400 83%) 0.4840
Epoch 17 | Iter 18500 | 632m 39s (- 128m 1s) (18500 83%) 0.3845
Epoch 17 | Iter 18600 | 632m 4

Epoch 18 | Iter 800 | 644m 5s (- 112m 26s) (800 85%) 0.5404
Epoch 18 | Iter 900 | 644m 18s (- 112m 19s) (900 85%) 0.5279
Epoch 18 | Iter 1000 | 644m 31s (- 112m 12s) (1000 85%) 0.4832
Epoch 18 | Iter 1100 | 644m 44s (- 112m 6s) (1100 85%) 0.4846
Epoch 18 | Iter 1200 | 644m 57s (- 111m 59s) (1200 85%) 0.4821
Epoch 18 | Iter 1300 | 645m 9s (- 111m 52s) (1300 85%) 0.4842
Epoch 18 | Iter 1400 | 645m 21s (- 111m 45s) (1400 85%) 0.4671
Epoch 18 | Iter 1500 | 645m 34s (- 111m 38s) (1500 85%) 0.4332
Epoch 18 | Iter 1600 | 645m 46s (- 111m 31s) (1600 85%) 0.4259
Epoch 18 | Iter 1700 | 645m 59s (- 111m 24s) (1700 85%) 0.4508
Epoch 18 | Iter 1800 | 646m 11s (- 111m 17s) (1800 85%) 0.4292
Epoch 18 | Iter 1900 | 646m 23s (- 111m 10s) (1900 85%) 0.4346
Epoch 18 | Iter 2000 | 646m 35s (- 111m 3s) (2000 85%) 0.4311
Epoch 18 | Iter 2100 | 646m 47s (- 110m 55s) (2100 85%) 0.4229
Epoch 18 | Iter 2200 | 646m 59s (- 110m 48s) (2200 85%) 0.4093
Epoch 18 | Iter 2300 | 647m 11s (- 110m 41s) (2300 85%) 0.4057


Epoch 18 | Iter 13800 | 665m 4s (- 96m 11s) (13800 87%) 0.3520
Epoch 18 | Iter 13900 | 665m 11s (- 96m 3s) (13900 87%) 0.3748
Epoch 18 | Iter 14000 | 665m 19s (- 95m 55s) (14000 87%) 0.3347
Epoch 18 | Iter 14100 | 665m 27s (- 95m 47s) (14100 87%) 0.4863
Epoch 18 | Iter 14200 | 665m 34s (- 95m 40s) (14200 87%) 0.3495
Epoch 18 | Iter 14300 | 665m 42s (- 95m 32s) (14300 87%) 0.3524
Epoch 18 | Iter 14400 | 665m 49s (- 95m 24s) (14400 87%) 0.3863
Epoch 18 | Iter 14500 | 665m 57s (- 95m 16s) (14500 87%) 0.3362
Epoch 18 | Iter 14600 | 666m 4s (- 95m 8s) (14600 87%) 0.3689
Epoch 18 | Iter 14700 | 666m 12s (- 95m 0s) (14700 87%) 0.3732
Epoch 18 | Iter 14800 | 666m 19s (- 94m 52s) (14800 87%) 0.3566
Epoch 18 | Iter 14900 | 666m 26s (- 94m 45s) (14900 87%) 0.3888
Epoch 18 | Iter 15000 | 666m 34s (- 94m 37s) (15000 87%) 0.3649
Epoch 18 | Iter 15100 | 666m 41s (- 94m 29s) (15100 87%) 0.4464
Epoch 18 | Iter 15200 | 666m 49s (- 94m 21s) (15200 87%) 0.4546
Epoch 18 | Iter 15300 | 666m 56s (- 94m 13s) 

Epoch 18 | Iter 26700 | 678m 22s (- 78m 57s) (26700 89%) 0.2997
Epoch 18 | Iter 26800 | 678m 27s (- 78m 49s) (26800 89%) 0.3379
Epoch 18 | Iter 26900 | 678m 31s (- 78m 41s) (26900 89%) 0.3663
Epoch 18 | Iter 27000 | 678m 36s (- 78m 33s) (27000 89%) 0.3341
Epoch 18 | Iter 27100 | 678m 40s (- 78m 24s) (27100 89%) 0.3811
Epoch 18 | Iter 27200 | 678m 45s (- 78m 16s) (27200 89%) 0.3297
Epoch 18 | Iter 27300 | 678m 50s (- 78m 8s) (27300 89%) 0.6522
Epoch 18 | Iter 27400 | 678m 54s (- 78m 0s) (27400 89%) 0.3629
Epoch 18 | Iter 27500 | 678m 59s (- 77m 52s) (27500 89%) 0.3519
Epoch 18 | Iter 27600 | 679m 3s (- 77m 44s) (27600 89%) 0.2881
Epoch 18 | Iter 27700 | 679m 7s (- 77m 36s) (27700 89%) 0.3926
Epoch 18 | Iter 27800 | 679m 12s (- 77m 27s) (27800 89%) 0.3406
Epoch 18 | Iter 27900 | 679m 16s (- 77m 19s) (27900 89%) 0.3529
Epoch 18 | Iter 28000 | 679m 20s (- 77m 11s) (28000 89%) 0.3846
Epoch 18 | Iter 28100 | 679m 25s (- 77m 3s) (28100 89%) 0.6528
Epoch 18 | Iter 28200 | 679m 29s (- 76m 55s) 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 20 | Iter 12600 | 738m 47s (- 21m 36s) (12600 97%) 0.3918
Epoch 20 | Iter 12700 | 738m 55s (- 21m 28s) (12700 97%) 0.3765
Epoch 20 | Iter 12800 | 739m 3s (- 21m 20s) (12800 97%) 0.3784
Epoch 20 | Iter 12900 | 739m 11s (- 21m 12s) (12900 97%) 0.3759
Epoch 20 | Iter 13000 | 739m 19s (- 21m 5s) (13000 97%) 0.3703
Epoch 20 | Iter 13100 | 739m 27s (- 20m 57s) (13100 97%) 0.4574
Epoch 20 | Iter 13200 | 739m 35s (- 20m 49s) (13200 97%) 0.3521
Epoch 20 | Iter 13300 | 739m 43s (- 20m 41s) (13300 97%) 0.3753
Epoch 20 | Iter 13400 | 739m 50s (- 20m 33s) (13400 97%) 0.3558
Epoch 20 | Iter 13500 | 739m 56s (- 20m 26s) (13500 97%) 0.3773
Epoch 20 | Iter 13600 | 740m 4s (- 20m 18s) (13600 97%) 0.3795
Epoch 20 | Iter 13700 | 740m 12s (- 20m 10s) (13700 97%) 0.3552
Epoch 20 | Iter 13800 | 740m 20s (- 20m 2s) (13800 97%) 0.3500
Epoch 20 | Iter 13900 | 740m 28s (- 19m 54s) (13900 97%) 0.3768
Epoch 20 | Iter 14000 | 740m 36s (- 19m 46s) (14000 97%) 0.3482
Epoch 20 | Iter 14100 | 740m 44s (- 19m 39s)

Epoch 20 | Iter 25600 | 752m 53s (- 4m 39s) (25600 99%) 0.3656
Epoch 20 | Iter 25700 | 752m 58s (- 4m 31s) (25700 99%) 0.3074
Epoch 20 | Iter 25800 | 753m 3s (- 4m 23s) (25800 99%) 0.4001
Epoch 20 | Iter 25900 | 753m 8s (- 4m 15s) (25900 99%) 0.3105
Epoch 20 | Iter 26000 | 753m 13s (- 4m 8s) (26000 99%) 0.3239
Epoch 20 | Iter 26100 | 753m 18s (- 4m 0s) (26100 99%) 0.3747
Epoch 20 | Iter 26200 | 753m 23s (- 3m 52s) (26200 99%) 0.4171
Epoch 20 | Iter 26300 | 753m 28s (- 3m 44s) (26300 99%) 0.5887
Epoch 20 | Iter 26400 | 753m 33s (- 3m 36s) (26400 99%) 0.3493
Epoch 20 | Iter 26500 | 753m 38s (- 3m 29s) (26500 99%) 0.2743
Epoch 20 | Iter 26600 | 753m 42s (- 3m 21s) (26600 99%) 0.3887
Epoch 20 | Iter 26700 | 753m 47s (- 3m 13s) (26700 99%) 0.3046
Epoch 20 | Iter 26800 | 753m 52s (- 3m 5s) (26800 99%) 0.3248
Epoch 20 | Iter 26900 | 753m 56s (- 2m 57s) (26900 99%) 0.3640
Epoch 20 | Iter 27000 | 754m 1s (- 2m 50s) (27000 99%) 0.3388
Epoch 20 | Iter 27100 | 754m 6s (- 2m 42s) (27100 99%) 0.3915

In [54]:
input_size = 240

hidden_size = 256
vocab_size = len(char_list)
embedding_dim = 512
sos_id = 1
eos_id = 2
learning_rate = 1e-3
momentum = 0
l2 = 1e-5

IGNORE_ID=-1

encoder1 = Encoder(input_size, hidden_size, dropout=0.2)
decoder1 = Decoder(vocab_size, embedding_dim, sos_id, eos_id, hidden_size*2,
                 num_layers=1, bidirectional_encoder=True)

model1 = Seq2Seq(encoder1, decoder1)
print(model1)
model1.cuda()

optimizier = torch.optim.Adam(model1.parameters(),
                                     lr=learning_rate,
#                                      momentum=momentum,
                                     weight_decay=l2)
trainIters(model1, 20,optimizier, print_every=100)



Seq2Seq(
  (encoder): Encoder(
    (pyramidalBLSTM): pyramidalBLSTM(
      (lstm1): LSTM(240, 256, batch_first=True, dropout=0.2, bidirectional=True)
      (lstm2): LSTM(1024, 256, batch_first=True, dropout=0.2, bidirectional=True)
      (lstm3): LSTM(1024, 256, batch_first=True, dropout=0.2, bidirectional=True)
    )
  )
  (decoder): Decoder(
    (embedding): Embedding(5221, 512)
    (rnn): ModuleList(
      (0): LSTMCell(1024, 512)
    )
    (attention): DotProductAttention()
    (mlp): Sequential(
      (0): Linear(in_features=1024, out_features=512, bias=True)
      (1): Tanh()
      (2): Linear(in_features=512, out_features=5221, bias=True)
    )
  )
)
Epoch 1 | Iter 100 | 0m 47s (- 4636m 27s) (100 0%) 6.9207
Epoch 1 | Iter 200 | 1m 27s (- 4261m 11s) (200 0%) 6.4932
Epoch 1 | Iter 300 | 2m 7s (- 4140m 3s) (300 0%) 6.0741
Epoch 1 | Iter 400 | 2m 44s (- 4009m 5s) (400 0%) 5.6419
Epoch 1 | Iter 500 | 3m 22s (- 3941m 30s) (500 0%) 5.3040
Epoch 1 | Iter 600 | 3m 58s (- 3867m 38s) (600 

Epoch 1 | Iter 13000 | 56m 57s (- 2500m 22s) (13000 2%) 0.5697
Epoch 1 | Iter 13100 | 57m 12s (- 2491m 55s) (13100 2%) 0.6842
Epoch 1 | Iter 13200 | 57m 30s (- 2485m 42s) (13200 2%) 0.5302
Epoch 1 | Iter 13300 | 57m 49s (- 2479m 47s) (13300 2%) 0.5734
Epoch 1 | Iter 13400 | 58m 6s (- 2473m 18s) (13400 2%) 0.5343
Epoch 1 | Iter 13500 | 58m 20s (- 2464m 34s) (13500 2%) 0.5627
Epoch 1 | Iter 13600 | 58m 31s (- 2453m 20s) (13600 2%) 0.5605
Epoch 1 | Iter 13700 | 58m 44s (- 2443m 58s) (13700 2%) 0.5309
Epoch 1 | Iter 13800 | 58m 59s (- 2436m 3s) (13800 2%) 0.5290
Epoch 1 | Iter 13900 | 59m 11s (- 2426m 24s) (13900 2%) 0.5566
Epoch 1 | Iter 14000 | 59m 22s (- 2416m 1s) (14000 2%) 0.5084
Epoch 1 | Iter 14100 | 59m 33s (- 2406m 24s) (14100 2%) 0.6763
Epoch 1 | Iter 14200 | 59m 44s (- 2396m 28s) (14200 2%) 0.4995
Epoch 1 | Iter 14300 | 59m 55s (- 2386m 19s) (14300 2%) 0.5105
Epoch 1 | Iter 14400 | 60m 5s (- 2375m 52s) (14400 2%) 0.5616
Epoch 1 | Iter 14500 | 60m 16s (- 2366m 6s) (14500 2%) 0.49

Epoch 1 | Iter 26100 | 90m 44s (- 1938m 45s) (26100 4%) 0.4562
Epoch 1 | Iter 26200 | 90m 58s (- 1935m 50s) (26200 4%) 0.5081
Epoch 1 | Iter 26300 | 91m 13s (- 1933m 38s) (26300 4%) 0.6758
Epoch 1 | Iter 26400 | 91m 27s (- 1930m 53s) (26400 4%) 0.4269
Epoch 1 | Iter 26500 | 91m 42s (- 1928m 33s) (26500 4%) 0.3293
Epoch 1 | Iter 26600 | 91m 57s (- 1926m 9s) (26600 4%) 0.4584
Epoch 1 | Iter 26700 | 92m 14s (- 1924m 31s) (26700 4%) 0.3599
Epoch 1 | Iter 26800 | 92m 31s (- 1922m 54s) (26800 4%) 0.3811
Epoch 1 | Iter 26900 | 92m 49s (- 1921m 35s) (26900 4%) 0.4278
Epoch 1 | Iter 27000 | 93m 6s (- 1919m 55s) (27000 4%) 0.3745
Epoch 1 | Iter 27100 | 93m 24s (- 1918m 40s) (27100 4%) 0.4376
Epoch 1 | Iter 27200 | 93m 38s (- 1915m 53s) (27200 4%) 0.3839
Epoch 1 | Iter 27300 | 93m 47s (- 1911m 39s) (27300 4%) 0.7611
Epoch 1 | Iter 27400 | 93m 55s (- 1907m 1s) (27400 4%) 0.4335
Epoch 1 | Iter 27500 | 94m 3s (- 1902m 23s) (27500 4%) 0.4121
Epoch 1 | Iter 27600 | 94m 11s (- 1897m 49s) (27600 4%) 0.3

Epoch 2 | Iter 10300 | 120m 18s (- 1658m 10s) (10300 6%) 0.4180
Epoch 2 | Iter 10400 | 120m 29s (- 1656m 15s) (10400 6%) 0.4280
Epoch 2 | Iter 10500 | 120m 41s (- 1654m 29s) (10500 6%) 0.4100
Epoch 2 | Iter 10600 | 120m 51s (- 1652m 21s) (10600 6%) 0.3992
Epoch 2 | Iter 10700 | 121m 2s (- 1650m 17s) (10700 6%) 0.3966
Epoch 2 | Iter 10800 | 121m 13s (- 1648m 27s) (10800 6%) 0.3803
Epoch 2 | Iter 10900 | 121m 25s (- 1646m 44s) (10900 6%) 0.3825
Epoch 2 | Iter 11000 | 121m 36s (- 1644m 56s) (11000 6%) 0.3767
Epoch 2 | Iter 11100 | 121m 48s (- 1643m 10s) (11100 6%) 0.3807
Epoch 2 | Iter 11200 | 121m 59s (- 1641m 14s) (11200 6%) 0.4050
Epoch 2 | Iter 11300 | 122m 10s (- 1639m 25s) (11300 6%) 0.4921
Epoch 2 | Iter 11400 | 122m 21s (- 1637m 31s) (11400 6%) 0.4034
Epoch 2 | Iter 11500 | 122m 33s (- 1635m 43s) (11500 6%) 0.3887
Epoch 2 | Iter 11600 | 122m 44s (- 1633m 54s) (11600 6%) 0.3971
Epoch 2 | Iter 11700 | 122m 55s (- 1632m 6s) (11700 7%) 0.4062
Epoch 2 | Iter 11800 | 123m 6s (- 1630m 17

Epoch 2 | Iter 23200 | 141m 52s (- 1438m 59s) (23200 8%) 0.3765
Epoch 2 | Iter 23300 | 142m 0s (- 1437m 24s) (23300 8%) 0.3657
Epoch 2 | Iter 23400 | 142m 8s (- 1435m 44s) (23400 9%) 0.3026
Epoch 2 | Iter 23500 | 142m 16s (- 1434m 2s) (23500 9%) 0.3413
Epoch 2 | Iter 23600 | 142m 24s (- 1432m 23s) (23600 9%) 0.3681
Epoch 2 | Iter 23700 | 142m 31s (- 1430m 39s) (23700 9%) 0.3622
Epoch 2 | Iter 23800 | 142m 39s (- 1428m 56s) (23800 9%) 0.4081
Epoch 2 | Iter 23900 | 142m 47s (- 1427m 22s) (23900 9%) 0.5753
Epoch 2 | Iter 24000 | 142m 55s (- 1425m 43s) (24000 9%) 0.3930
Epoch 2 | Iter 24100 | 143m 3s (- 1424m 2s) (24100 9%) 0.3138
Epoch 2 | Iter 24200 | 143m 11s (- 1422m 25s) (24200 9%) 0.3586
Epoch 2 | Iter 24300 | 143m 19s (- 1420m 54s) (24300 9%) 0.3176
Epoch 2 | Iter 24400 | 143m 28s (- 1419m 24s) (24400 9%) 0.3970
Epoch 2 | Iter 24500 | 143m 36s (- 1417m 53s) (24500 9%) 0.3591
Epoch 2 | Iter 24600 | 143m 44s (- 1416m 20s) (24600 9%) 0.3268
Epoch 2 | Iter 24700 | 143m 53s (- 1414m 50s)

Epoch 3 | Iter 7200 | 167m 24s (- 1322m 52s) (7200 11%) 0.3427
Epoch 3 | Iter 7300 | 167m 37s (- 1322m 19s) (7300 11%) 0.3509
Epoch 3 | Iter 7400 | 167m 50s (- 1321m 44s) (7400 11%) 0.3562
Epoch 3 | Iter 7500 | 168m 3s (- 1321m 9s) (7500 11%) 0.3876
Epoch 3 | Iter 7600 | 168m 16s (- 1320m 34s) (7600 11%) 0.3580
Epoch 3 | Iter 7700 | 168m 28s (- 1319m 57s) (7700 11%) 0.3746
Epoch 3 | Iter 7800 | 168m 41s (- 1319m 21s) (7800 11%) 0.3584
Epoch 3 | Iter 7900 | 168m 53s (- 1318m 42s) (7900 11%) 0.3570
Epoch 3 | Iter 8000 | 169m 6s (- 1318m 7s) (8000 11%) 0.3575
Epoch 3 | Iter 8100 | 169m 18s (- 1317m 30s) (8100 11%) 0.3530
Epoch 3 | Iter 8200 | 169m 31s (- 1316m 55s) (8200 11%) 0.4112
Epoch 3 | Iter 8300 | 169m 43s (- 1316m 17s) (8300 11%) 0.3524
Epoch 3 | Iter 8400 | 169m 56s (- 1315m 41s) (8400 11%) 0.3555
Epoch 3 | Iter 8500 | 170m 8s (- 1315m 2s) (8500 11%) 0.3406
Epoch 3 | Iter 8600 | 170m 20s (- 1314m 23s) (8600 11%) 0.3330
Epoch 3 | Iter 8700 | 170m 33s (- 1313m 46s) (8700 11%) 0.365

Epoch 3 | Iter 20000 | 190m 18s (- 1227m 4s) (20000 13%) 0.3602
Epoch 3 | Iter 20100 | 190m 27s (- 1226m 20s) (20100 13%) 0.3119
Epoch 3 | Iter 20200 | 190m 37s (- 1225m 36s) (20200 13%) 0.3383
Epoch 3 | Iter 20300 | 190m 47s (- 1224m 52s) (20300 13%) 0.3589
Epoch 3 | Iter 20400 | 190m 57s (- 1224m 7s) (20400 13%) 0.3208
Epoch 3 | Iter 20500 | 191m 7s (- 1223m 23s) (20500 13%) 0.4502
Epoch 3 | Iter 20600 | 191m 17s (- 1222m 40s) (20600 13%) 0.3910
Epoch 3 | Iter 20700 | 191m 27s (- 1221m 57s) (20700 13%) 0.3488
Epoch 3 | Iter 20800 | 191m 37s (- 1221m 10s) (20800 13%) 0.3570
Epoch 3 | Iter 20900 | 191m 45s (- 1220m 19s) (20900 13%) 0.3069
Epoch 3 | Iter 21000 | 191m 54s (- 1219m 27s) (21000 13%) 0.3523
Epoch 3 | Iter 21100 | 192m 3s (- 1218m 34s) (21100 13%) 0.3171
Epoch 3 | Iter 21200 | 192m 11s (- 1217m 40s) (21200 13%) 0.3592
Epoch 3 | Iter 21300 | 192m 20s (- 1216m 54s) (21300 13%) 0.2878
Epoch 3 | Iter 21400 | 192m 30s (- 1216m 8s) (21400 13%) 0.3759
Epoch 3 | Iter 21500 | 192m 39

Epoch 4 | Iter 3800 | 213m 1s (- 1148m 5s) (3800 15%) 0.3574
Epoch 4 | Iter 3900 | 213m 18s (- 1148m 4s) (3900 15%) 0.3536
Epoch 4 | Iter 4000 | 213m 32s (- 1147m 51s) (4000 15%) 0.3854
Epoch 4 | Iter 4100 | 213m 46s (- 1147m 39s) (4100 15%) 0.3382
Epoch 4 | Iter 4200 | 214m 1s (- 1147m 27s) (4200 15%) 0.3416
Epoch 4 | Iter 4300 | 214m 15s (- 1147m 15s) (4300 15%) 0.3455
Epoch 4 | Iter 4400 | 214m 30s (- 1147m 7s) (4400 15%) 0.3985
Epoch 4 | Iter 4500 | 214m 44s (- 1146m 50s) (4500 15%) 0.3483
Epoch 4 | Iter 4600 | 214m 58s (- 1146m 40s) (4600 15%) 0.3655
Epoch 4 | Iter 4700 | 215m 13s (- 1146m 29s) (4700 15%) 0.4314
Epoch 4 | Iter 4800 | 215m 27s (- 1146m 16s) (4800 15%) 0.3704
Epoch 4 | Iter 4900 | 215m 41s (- 1146m 4s) (4900 15%) 0.3729
Epoch 4 | Iter 5000 | 215m 56s (- 1145m 52s) (5000 15%) 0.3493
Epoch 4 | Iter 5100 | 216m 10s (- 1145m 42s) (5100 15%) 0.3432
Epoch 4 | Iter 5200 | 216m 25s (- 1145m 33s) (5200 15%) 0.3552
Epoch 4 | Iter 5300 | 216m 40s (- 1145m 20s) (5300 15%) 0.348

Epoch 4 | Iter 16700 | 239m 24s (- 1100m 59s) (16700 17%) 0.3674
Epoch 4 | Iter 16800 | 239m 35s (- 1100m 32s) (16800 17%) 0.3378
Epoch 4 | Iter 16900 | 239m 46s (- 1100m 7s) (16900 17%) 0.3330
Epoch 4 | Iter 17000 | 239m 57s (- 1099m 39s) (17000 17%) 0.2878
Epoch 4 | Iter 17100 | 240m 7s (- 1099m 7s) (17100 17%) 0.3434
Epoch 4 | Iter 17200 | 240m 16s (- 1098m 34s) (17200 17%) 0.3176
Epoch 4 | Iter 17300 | 240m 26s (- 1098m 4s) (17300 17%) 0.3957
Epoch 4 | Iter 17400 | 240m 35s (- 1097m 28s) (17400 17%) 0.3601
Epoch 4 | Iter 17500 | 240m 45s (- 1096m 57s) (17500 17%) 0.3586
Epoch 4 | Iter 17600 | 240m 55s (- 1096m 27s) (17600 18%) 0.3344
Epoch 4 | Iter 17700 | 241m 6s (- 1095m 59s) (17700 18%) 0.3198
Epoch 4 | Iter 17800 | 241m 16s (- 1095m 28s) (17800 18%) 0.3378
Epoch 4 | Iter 17900 | 241m 26s (- 1094m 56s) (17900 18%) 0.3749
Epoch 4 | Iter 18000 | 241m 35s (- 1094m 24s) (18000 18%) 0.2879
Epoch 4 | Iter 18100 | 241m 45s (- 1093m 52s) (18100 18%) 0.3280
Epoch 4 | Iter 18200 | 241m 55

Epoch 5 | Iter 300 | 257m 45s (- 1027m 42s) (300 20%) 0.6521
Epoch 5 | Iter 400 | 258m 3s (- 1027m 50s) (400 20%) 0.6076
Epoch 5 | Iter 500 | 258m 22s (- 1028m 0s) (500 20%) 0.5991
Epoch 5 | Iter 600 | 258m 41s (- 1028m 9s) (600 20%) 0.5545
Epoch 5 | Iter 700 | 258m 58s (- 1028m 11s) (700 20%) 0.5196
Epoch 5 | Iter 800 | 259m 16s (- 1028m 17s) (800 20%) 0.4784
Epoch 5 | Iter 900 | 259m 34s (- 1028m 23s) (900 20%) 0.4659
Epoch 5 | Iter 1000 | 259m 52s (- 1028m 28s) (1000 20%) 0.4273
Epoch 5 | Iter 1100 | 260m 10s (- 1028m 32s) (1100 20%) 0.4328
Epoch 5 | Iter 1200 | 260m 28s (- 1028m 37s) (1200 20%) 0.4197
Epoch 5 | Iter 1300 | 260m 45s (- 1028m 40s) (1300 20%) 0.4201
Epoch 5 | Iter 1400 | 261m 2s (- 1028m 42s) (1400 20%) 0.3997
Epoch 5 | Iter 1500 | 261m 19s (- 1028m 42s) (1500 20%) 0.3815
Epoch 5 | Iter 1600 | 261m 35s (- 1028m 39s) (1600 20%) 0.3653
Epoch 5 | Iter 1700 | 261m 52s (- 1028m 41s) (1700 20%) 0.3856
Epoch 5 | Iter 1800 | 262m 9s (- 1028m 45s) (1800 20%) 0.3571
Epoch 5 | I

Epoch 5 | Iter 13300 | 287m 1s (- 1001m 20s) (13300 22%) 0.3308
Epoch 5 | Iter 13400 | 287m 12s (- 1000m 59s) (13400 22%) 0.3130
Epoch 5 | Iter 13500 | 287m 23s (- 1000m 37s) (13500 22%) 0.3368
Epoch 5 | Iter 13600 | 287m 34s (- 1000m 16s) (13600 22%) 0.3375
Epoch 5 | Iter 13700 | 287m 45s (- 999m 54s) (13700 22%) 0.3166
Epoch 5 | Iter 13800 | 287m 55s (- 999m 32s) (13800 22%) 0.3070
Epoch 5 | Iter 13900 | 288m 6s (- 999m 10s) (13900 22%) 0.3210
Epoch 5 | Iter 14000 | 288m 17s (- 998m 49s) (14000 22%) 0.2952
Epoch 5 | Iter 14100 | 288m 27s (- 998m 26s) (14100 22%) 0.4216
Epoch 5 | Iter 14200 | 288m 38s (- 998m 5s) (14200 22%) 0.3004
Epoch 5 | Iter 14300 | 288m 49s (- 997m 42s) (14300 22%) 0.3105
Epoch 5 | Iter 14400 | 289m 0s (- 997m 20s) (14400 22%) 0.3414
Epoch 5 | Iter 14500 | 289m 10s (- 996m 57s) (14500 22%) 0.2903
Epoch 5 | Iter 14600 | 289m 21s (- 996m 35s) (14600 22%) 0.3195
Epoch 5 | Iter 14700 | 289m 31s (- 996m 12s) (14700 22%) 0.3242
Epoch 5 | Iter 14800 | 289m 41s (- 995m 

Epoch 5 | Iter 26200 | 306m 29s (- 945m 5s) (26200 24%) 0.3828
Epoch 5 | Iter 26300 | 306m 37s (- 944m 36s) (26300 24%) 0.5135
Epoch 5 | Iter 26400 | 306m 44s (- 944m 6s) (26400 24%) 0.3103
Epoch 5 | Iter 26500 | 306m 51s (- 943m 35s) (26500 24%) 0.2449
Epoch 5 | Iter 26600 | 306m 58s (- 943m 4s) (26600 24%) 0.3574
Epoch 5 | Iter 26700 | 307m 5s (- 942m 33s) (26700 24%) 0.2629
Epoch 5 | Iter 26800 | 307m 12s (- 942m 2s) (26800 24%) 0.2835
Epoch 5 | Iter 26900 | 307m 18s (- 941m 30s) (26900 24%) 0.3317
Epoch 5 | Iter 27000 | 307m 25s (- 940m 59s) (27000 24%) 0.2929
Epoch 5 | Iter 27100 | 307m 32s (- 940m 27s) (27100 24%) 0.3404
Epoch 5 | Iter 27200 | 307m 38s (- 939m 55s) (27200 24%) 0.2878
Epoch 5 | Iter 27300 | 307m 46s (- 939m 26s) (27300 24%) 0.5984
Epoch 5 | Iter 27400 | 307m 53s (- 938m 55s) (27400 24%) 0.3347
Epoch 5 | Iter 27500 | 307m 59s (- 938m 23s) (27500 24%) 0.3159
Epoch 5 | Iter 27600 | 308m 6s (- 937m 52s) (27600 24%) 0.2584
Epoch 5 | Iter 27700 | 308m 12s (- 937m 20s) (

Epoch 6 | Iter 10400 | 334m 28s (- 914m 24s) (10400 26%) 0.3403
Epoch 6 | Iter 10500 | 334m 39s (- 914m 8s) (10500 26%) 0.3284
Epoch 6 | Iter 10600 | 334m 51s (- 913m 52s) (10600 26%) 0.3050
Epoch 6 | Iter 10700 | 335m 3s (- 913m 36s) (10700 26%) 0.3085
Epoch 6 | Iter 10800 | 335m 14s (- 913m 19s) (10800 26%) 0.2999
Epoch 6 | Iter 10900 | 335m 26s (- 913m 3s) (10900 26%) 0.3057
Epoch 6 | Iter 11000 | 335m 37s (- 912m 45s) (11000 26%) 0.2966
Epoch 6 | Iter 11100 | 335m 48s (- 912m 29s) (11100 26%) 0.2997
Epoch 6 | Iter 11200 | 335m 59s (- 912m 11s) (11200 26%) 0.3272
Epoch 6 | Iter 11300 | 336m 11s (- 911m 55s) (11300 26%) 0.4050
Epoch 6 | Iter 11400 | 336m 22s (- 911m 37s) (11400 26%) 0.3258
Epoch 6 | Iter 11500 | 336m 33s (- 911m 21s) (11500 26%) 0.3075
Epoch 6 | Iter 11600 | 336m 44s (- 911m 3s) (11600 26%) 0.3220
Epoch 6 | Iter 11700 | 336m 56s (- 910m 47s) (11700 27%) 0.3071
Epoch 6 | Iter 11800 | 337m 7s (- 910m 30s) (11800 27%) 0.3091
Epoch 6 | Iter 11900 | 337m 19s (- 910m 14s) 

Epoch 6 | Iter 23300 | 355m 47s (- 871m 26s) (23300 28%) 0.3196
Epoch 6 | Iter 23400 | 355m 55s (- 871m 2s) (23400 29%) 0.2483
Epoch 6 | Iter 23500 | 356m 3s (- 870m 39s) (23500 29%) 0.2893
Epoch 6 | Iter 23600 | 356m 11s (- 870m 15s) (23600 29%) 0.3149
Epoch 6 | Iter 23700 | 356m 19s (- 869m 50s) (23700 29%) 0.3047
Epoch 6 | Iter 23800 | 356m 27s (- 869m 26s) (23800 29%) 0.3427
Epoch 6 | Iter 23900 | 356m 35s (- 869m 2s) (23900 29%) 0.5045
Epoch 6 | Iter 24000 | 356m 42s (- 868m 37s) (24000 29%) 0.3333
Epoch 6 | Iter 24100 | 356m 50s (- 868m 12s) (24100 29%) 0.2644
Epoch 6 | Iter 24200 | 356m 57s (- 867m 47s) (24200 29%) 0.3071
Epoch 6 | Iter 24300 | 357m 5s (- 867m 22s) (24300 29%) 0.2591
Epoch 6 | Iter 24400 | 357m 12s (- 866m 57s) (24400 29%) 0.3456
Epoch 6 | Iter 24500 | 357m 20s (- 866m 32s) (24500 29%) 0.3109
Epoch 6 | Iter 24600 | 357m 27s (- 866m 7s) (24600 29%) 0.2803
Epoch 6 | Iter 24700 | 357m 34s (- 865m 41s) (24700 29%) 0.3156
Epoch 6 | Iter 24800 | 357m 42s (- 865m 17s) 

Epoch 7 | Iter 7400 | 380m 19s (- 836m 1s) (7400 31%) 0.3119
Epoch 7 | Iter 7500 | 380m 31s (- 835m 46s) (7500 31%) 0.3546
Epoch 7 | Iter 7600 | 380m 41s (- 835m 30s) (7600 31%) 0.3214
Epoch 7 | Iter 7700 | 380m 52s (- 835m 14s) (7700 31%) 0.3156
Epoch 7 | Iter 7800 | 381m 3s (- 834m 59s) (7800 31%) 0.3096
Epoch 7 | Iter 7900 | 381m 14s (- 834m 43s) (7900 31%) 0.3223
Epoch 7 | Iter 8000 | 381m 25s (- 834m 26s) (8000 31%) 1.0139
Epoch 7 | Iter 8100 | 381m 36s (- 834m 10s) (8100 31%) 0.6378
Epoch 7 | Iter 8200 | 381m 47s (- 833m 54s) (8200 31%) 0.5784
Epoch 7 | Iter 8300 | 381m 58s (- 833m 38s) (8300 31%) 0.4500
Epoch 7 | Iter 8400 | 382m 8s (- 833m 22s) (8400 31%) 0.4264
Epoch 7 | Iter 8500 | 382m 19s (- 833m 6s) (8500 31%) 0.4079
Epoch 7 | Iter 8600 | 382m 31s (- 832m 52s) (8600 31%) 0.3825
Epoch 7 | Iter 8700 | 382m 41s (- 832m 35s) (8700 31%) 0.3892
Epoch 7 | Iter 8800 | 382m 52s (- 832m 19s) (8800 31%) 0.3928
Epoch 7 | Iter 8900 | 383m 4s (- 832m 4s) (8900 31%) 0.4082
Epoch 7 | Iter

Epoch 7 | Iter 20400 | 402m 23s (- 798m 57s) (20400 33%) 0.2865
Epoch 7 | Iter 20500 | 402m 32s (- 798m 38s) (20500 33%) 0.4078
Epoch 7 | Iter 20600 | 402m 41s (- 798m 20s) (20600 33%) 0.3571
Epoch 7 | Iter 20700 | 402m 50s (- 798m 1s) (20700 33%) 0.3091
Epoch 7 | Iter 20800 | 402m 59s (- 797m 41s) (20800 33%) 0.3306
Epoch 7 | Iter 20900 | 403m 8s (- 797m 22s) (20900 33%) 0.2786
Epoch 7 | Iter 21000 | 403m 16s (- 797m 3s) (21000 33%) 0.3191
Epoch 7 | Iter 21100 | 403m 25s (- 796m 43s) (21100 33%) 0.2895
Epoch 7 | Iter 21200 | 403m 33s (- 796m 23s) (21200 33%) 0.3318
Epoch 7 | Iter 21300 | 403m 42s (- 796m 4s) (21300 33%) 0.2556
Epoch 7 | Iter 21400 | 403m 51s (- 795m 44s) (21400 33%) 0.3328
Epoch 7 | Iter 21500 | 403m 59s (- 795m 24s) (21500 33%) 0.2673
Epoch 7 | Iter 21600 | 404m 8s (- 795m 4s) (21600 33%) 0.3981
Epoch 7 | Iter 21700 | 404m 17s (- 794m 45s) (21700 33%) 0.3857
Epoch 7 | Iter 21800 | 404m 25s (- 794m 25s) (21800 33%) 0.2697
Epoch 7 | Iter 21900 | 404m 34s (- 794m 5s) (2

Epoch 8 | Iter 4400 | 425m 34s (- 764m 42s) (4400 35%) 0.3544
Epoch 8 | Iter 4500 | 425m 48s (- 764m 33s) (4500 35%) 0.3138
Epoch 8 | Iter 4600 | 426m 3s (- 764m 26s) (4600 35%) 0.3095
Epoch 8 | Iter 4700 | 426m 18s (- 764m 19s) (4700 35%) 0.3316
Epoch 8 | Iter 4800 | 426m 32s (- 764m 10s) (4800 35%) 0.3526
Epoch 8 | Iter 4900 | 426m 46s (- 764m 1s) (4900 35%) 0.3431
Epoch 8 | Iter 5000 | 427m 0s (- 763m 52s) (5000 35%) 0.3238
Epoch 8 | Iter 5100 | 427m 15s (- 763m 45s) (5100 35%) 0.3166
Epoch 8 | Iter 5200 | 427m 29s (- 763m 36s) (5200 35%) 0.3253
Epoch 8 | Iter 5300 | 427m 43s (- 763m 27s) (5300 35%) 0.3185
Epoch 8 | Iter 5400 | 427m 58s (- 763m 19s) (5400 35%) 0.3169
Epoch 8 | Iter 5500 | 428m 12s (- 763m 11s) (5500 35%) 0.3623
Epoch 8 | Iter 5600 | 428m 26s (- 763m 1s) (5600 35%) 0.3334
Epoch 8 | Iter 5700 | 428m 40s (- 762m 52s) (5700 35%) 0.3150
Epoch 8 | Iter 5800 | 428m 55s (- 762m 44s) (5800 35%) 0.3620
Epoch 8 | Iter 5900 | 429m 9s (- 762m 35s) (5900 36%) 0.3219
Epoch 8 | Ite

Epoch 8 | Iter 17500 | 449m 41s (- 733m 46s) (17500 37%) 0.3162
Epoch 8 | Iter 17600 | 449m 50s (- 733m 29s) (17600 38%) 0.3057
Epoch 8 | Iter 17700 | 449m 59s (- 733m 12s) (17700 38%) 0.2963
Epoch 8 | Iter 17800 | 450m 8s (- 732m 54s) (17800 38%) 0.3116
Epoch 8 | Iter 17900 | 450m 17s (- 732m 36s) (17900 38%) 0.3562
Epoch 8 | Iter 18000 | 450m 26s (- 732m 19s) (18000 38%) 0.2738
Epoch 8 | Iter 18100 | 450m 34s (- 732m 1s) (18100 38%) 0.3030
Epoch 8 | Iter 18200 | 450m 43s (- 731m 43s) (18200 38%) 0.3121
Epoch 8 | Iter 18300 | 450m 52s (- 731m 26s) (18300 38%) 0.3634
Epoch 8 | Iter 18400 | 451m 1s (- 731m 9s) (18400 38%) 0.3894
Epoch 8 | Iter 18500 | 451m 10s (- 730m 51s) (18500 38%) 0.3386
Epoch 8 | Iter 18600 | 451m 19s (- 730m 34s) (18600 38%) 0.3029
Epoch 8 | Iter 18700 | 451m 28s (- 730m 17s) (18700 38%) 0.3119
Epoch 8 | Iter 18800 | 451m 37s (- 729m 59s) (18800 38%) 0.2762
Epoch 8 | Iter 18900 | 451m 46s (- 729m 42s) (18900 38%) 0.3121
Epoch 8 | Iter 19000 | 451m 54s (- 729m 24s)

Epoch 9 | Iter 1400 | 468m 52s (- 696m 19s) (1400 40%) 0.4690
Epoch 9 | Iter 1500 | 469m 8s (- 696m 13s) (1500 40%) 0.4318
Epoch 9 | Iter 1600 | 469m 23s (- 696m 6s) (1600 40%) 0.4083
Epoch 9 | Iter 1700 | 469m 38s (- 695m 59s) (1700 40%) 0.4275
Epoch 9 | Iter 1800 | 469m 54s (- 695m 52s) (1800 40%) 0.3950
Epoch 9 | Iter 1900 | 470m 9s (- 695m 44s) (1900 40%) 0.4357
Epoch 9 | Iter 2000 | 470m 24s (- 695m 37s) (2000 40%) 0.4223
Epoch 9 | Iter 2100 | 470m 39s (- 695m 29s) (2100 40%) 0.4014
Epoch 9 | Iter 2200 | 470m 54s (- 695m 22s) (2200 40%) 0.3852
Epoch 9 | Iter 2300 | 471m 9s (- 695m 14s) (2300 40%) 0.3854
Epoch 9 | Iter 2400 | 471m 23s (- 695m 6s) (2400 40%) 0.4019
Epoch 9 | Iter 2500 | 471m 38s (- 694m 58s) (2500 40%) 0.3692
Epoch 9 | Iter 2600 | 471m 53s (- 694m 50s) (2600 40%) 0.3730
Epoch 9 | Iter 2700 | 472m 7s (- 694m 41s) (2700 40%) 0.3480
Epoch 9 | Iter 2800 | 472m 21s (- 694m 33s) (2800 40%) 0.3620
Epoch 9 | Iter 2900 | 472m 36s (- 694m 24s) (2900 40%) 0.3475
Epoch 9 | Iter

Epoch 9 | Iter 14600 | 495m 18s (- 670m 6s) (14600 42%) 0.3125
Epoch 9 | Iter 14700 | 495m 29s (- 669m 51s) (14700 42%) 0.3036
Epoch 9 | Iter 14800 | 495m 38s (- 669m 36s) (14800 42%) 0.2988
Epoch 9 | Iter 14900 | 495m 48s (- 669m 21s) (14900 42%) 0.3318
Epoch 9 | Iter 15000 | 495m 58s (- 669m 7s) (15000 42%) 0.2972
Epoch 9 | Iter 15100 | 496m 8s (- 668m 52s) (15100 42%) 0.3636
Epoch 9 | Iter 15200 | 496m 18s (- 668m 37s) (15200 42%) 0.3570
Epoch 9 | Iter 15300 | 496m 28s (- 668m 22s) (15300 42%) 0.3052
Epoch 9 | Iter 15400 | 496m 38s (- 668m 7s) (15400 42%) 0.3070
Epoch 9 | Iter 15500 | 496m 48s (- 667m 53s) (15500 42%) 0.2946
Epoch 9 | Iter 15600 | 496m 57s (- 667m 38s) (15600 42%) 0.3265
Epoch 9 | Iter 15700 | 497m 7s (- 667m 23s) (15700 42%) 0.3153
Epoch 9 | Iter 15800 | 497m 17s (- 667m 8s) (15800 42%) 0.3222
Epoch 9 | Iter 15900 | 497m 27s (- 666m 53s) (15900 42%) 0.3390
Epoch 9 | Iter 16000 | 497m 36s (- 666m 38s) (16000 42%) 0.3283
Epoch 9 | Iter 16100 | 497m 46s (- 666m 23s) (

Epoch 9 | Iter 27500 | 513m 32s (- 635m 2s) (27500 44%) 0.2936
Epoch 9 | Iter 27600 | 513m 38s (- 634m 43s) (27600 44%) 0.2415
Epoch 9 | Iter 27700 | 513m 44s (- 634m 24s) (27700 44%) 0.3371
Epoch 9 | Iter 27800 | 513m 50s (- 634m 5s) (27800 44%) 0.2935
Epoch 9 | Iter 27900 | 513m 56s (- 633m 46s) (27900 44%) 0.2899
Epoch 9 | Iter 28000 | 514m 2s (- 633m 27s) (28000 44%) 0.3110
Epoch 9 | Iter 28100 | 514m 8s (- 633m 9s) (28100 44%) 0.5554
Epoch 9 | Iter 28200 | 514m 14s (- 632m 50s) (28200 44%) 0.3286
Epoch 9 | Iter 28300 | 514m 20s (- 632m 30s) (28300 44%) 0.2627
Epoch 9 | Iter 28400 | 514m 26s (- 632m 11s) (28400 44%) 0.3093
Epoch 9 | Iter 28500 | 514m 31s (- 631m 52s) (28500 44%) 0.3117
Epoch 9 | Iter 28600 | 514m 37s (- 631m 33s) (28600 44%) 0.4635
Epoch 9 | Iter 28700 | 514m 43s (- 631m 13s) (28700 44%) 0.3692
Epoch 9 | Iter 28800 | 514m 48s (- 630m 54s) (28800 44%) 0.2694
Epoch 9 | Iter 28900 | 514m 54s (- 630m 34s) (28900 44%) 0.3088
Epoch 9 | Iter 29000 | 514m 59s (- 630m 14s) 

Epoch 10 | Iter 11500 | 540m 28s (- 610m 12s) (11500 46%) 0.3084
Epoch 10 | Iter 11600 | 540m 39s (- 609m 59s) (11600 46%) 0.3394
Epoch 10 | Iter 11700 | 540m 49s (- 609m 45s) (11700 47%) 0.3046
Epoch 10 | Iter 11800 | 541m 0s (- 609m 32s) (11800 47%) 0.3107
Epoch 10 | Iter 11900 | 541m 10s (- 609m 19s) (11900 47%) 0.2846
Epoch 10 | Iter 12000 | 541m 21s (- 609m 6s) (12000 47%) 0.3105
Epoch 10 | Iter 12100 | 541m 31s (- 608m 52s) (12100 47%) 0.2907
Epoch 10 | Iter 12200 | 541m 42s (- 608m 39s) (12200 47%) 0.3808
Epoch 10 | Iter 12300 | 541m 52s (- 608m 25s) (12300 47%) 0.2857
Epoch 10 | Iter 12400 | 542m 2s (- 608m 12s) (12400 47%) 0.4421
Epoch 10 | Iter 12500 | 542m 13s (- 607m 58s) (12500 47%) 0.3656
Epoch 10 | Iter 12600 | 542m 23s (- 607m 45s) (12600 47%) 0.3636
Epoch 10 | Iter 12700 | 542m 33s (- 607m 31s) (12700 47%) 0.3264
Epoch 10 | Iter 12800 | 542m 44s (- 607m 18s) (12800 47%) 0.3372
Epoch 10 | Iter 12900 | 542m 54s (- 607m 4s) (12900 47%) 0.3511
Epoch 10 | Iter 13000 | 543m 

Epoch 10 | Iter 24200 | 560m 33s (- 580m 2s) (24200 49%) 0.3097
Epoch 10 | Iter 24300 | 560m 40s (- 579m 46s) (24300 49%) 0.2534
Epoch 10 | Iter 24400 | 560m 48s (- 579m 30s) (24400 49%) 0.3296
Epoch 10 | Iter 24500 | 560m 56s (- 579m 14s) (24500 49%) 0.2932
Epoch 10 | Iter 24600 | 561m 4s (- 578m 59s) (24600 49%) 0.2618
Epoch 10 | Iter 24700 | 561m 11s (- 578m 43s) (24700 49%) 0.3109
Epoch 10 | Iter 24800 | 561m 19s (- 578m 27s) (24800 49%) 0.2186
Epoch 10 | Iter 24900 | 561m 26s (- 578m 10s) (24900 49%) 0.3119
Epoch 10 | Iter 25000 | 561m 34s (- 577m 55s) (25000 49%) 0.3050
Epoch 10 | Iter 25100 | 561m 42s (- 577m 39s) (25100 49%) 0.4602
Epoch 10 | Iter 25200 | 561m 50s (- 577m 23s) (25200 49%) 0.3006
Epoch 10 | Iter 25300 | 561m 57s (- 577m 7s) (25300 49%) 0.3347
Epoch 10 | Iter 25400 | 562m 5s (- 576m 51s) (25400 49%) 0.2415
Epoch 10 | Iter 25500 | 562m 12s (- 576m 35s) (25500 49%) 0.2454
Epoch 10 | Iter 25600 | 562m 20s (- 576m 19s) (25600 49%) 0.2936
Epoch 10 | Iter 25700 | 562m 

Epoch 11 | Iter 8100 | 585m 29s (- 553m 52s) (8100 51%) 0.3080
Epoch 11 | Iter 8200 | 585m 41s (- 553m 40s) (8200 51%) 0.3615
Epoch 11 | Iter 8300 | 585m 53s (- 553m 29s) (8300 51%) 0.3077
Epoch 11 | Iter 8400 | 586m 4s (- 553m 17s) (8400 51%) 0.3058
Epoch 11 | Iter 8500 | 586m 16s (- 553m 5s) (8500 51%) 0.2937
Epoch 11 | Iter 8600 | 586m 27s (- 552m 53s) (8600 51%) 0.2903
Epoch 11 | Iter 8700 | 586m 39s (- 552m 41s) (8700 51%) 0.3119
Epoch 11 | Iter 8800 | 586m 50s (- 552m 29s) (8800 51%) 0.3110
Epoch 11 | Iter 8900 | 587m 1s (- 552m 17s) (8900 51%) 0.3357
Epoch 11 | Iter 9000 | 587m 13s (- 552m 5s) (9000 51%) 0.3127
Epoch 11 | Iter 9100 | 587m 24s (- 551m 53s) (9100 51%) 0.3121
Epoch 11 | Iter 9200 | 587m 35s (- 551m 40s) (9200 51%) 0.2936
Epoch 11 | Iter 9300 | 587m 46s (- 551m 28s) (9300 51%) 0.3106
Epoch 11 | Iter 9400 | 587m 57s (- 551m 16s) (9400 51%) 0.3036
Epoch 11 | Iter 9500 | 588m 9s (- 551m 4s) (9500 51%) 0.3006
Epoch 11 | Iter 9600 | 588m 20s (- 550m 52s) (9600 51%) 0.333

Epoch 11 | Iter 20900 | 606m 56s (- 525m 50s) (20900 53%) 0.2608
Epoch 11 | Iter 21000 | 607m 5s (- 525m 36s) (21000 53%) 0.3072
Epoch 11 | Iter 21100 | 607m 14s (- 525m 22s) (21100 53%) 0.2772
Epoch 11 | Iter 21200 | 607m 23s (- 525m 7s) (21200 53%) 0.3217
Epoch 11 | Iter 21300 | 607m 32s (- 524m 53s) (21300 53%) 0.2467
Epoch 11 | Iter 21400 | 607m 41s (- 524m 40s) (21400 53%) 0.3184
Epoch 11 | Iter 21500 | 607m 50s (- 524m 25s) (21500 53%) 0.2636
Epoch 11 | Iter 21600 | 607m 59s (- 524m 11s) (21600 53%) 0.3877
Epoch 11 | Iter 21700 | 608m 7s (- 523m 57s) (21700 53%) 0.3810
Epoch 11 | Iter 21800 | 608m 16s (- 523m 43s) (21800 53%) 0.2603
Epoch 11 | Iter 21900 | 608m 25s (- 523m 29s) (21900 53%) 0.2902
Epoch 11 | Iter 22000 | 608m 34s (- 523m 15s) (22000 53%) 0.3101
Epoch 11 | Iter 22100 | 608m 42s (- 523m 0s) (22100 53%) 0.2338
Epoch 11 | Iter 22200 | 608m 50s (- 522m 46s) (22200 53%) 0.3171
Epoch 11 | Iter 22300 | 608m 58s (- 522m 31s) (22300 53%) 0.2770
Epoch 11 | Iter 22400 | 609m 

Epoch 12 | Iter 5400 | 630m 47s (- 497m 8s) (5400 55%) 0.3235
Epoch 12 | Iter 5500 | 631m 0s (- 496m 57s) (5500 55%) 0.3097
Epoch 12 | Iter 5600 | 631m 13s (- 496m 46s) (5600 55%) 0.3153
Epoch 12 | Iter 5700 | 631m 26s (- 496m 36s) (5700 55%) 0.3045
Epoch 12 | Iter 5800 | 631m 39s (- 496m 25s) (5800 55%) 0.3530
Epoch 12 | Iter 5900 | 631m 51s (- 496m 14s) (5900 56%) 0.3152
Epoch 12 | Iter 6000 | 632m 4s (- 496m 3s) (6000 56%) 0.3053
Epoch 12 | Iter 6100 | 632m 16s (- 495m 53s) (6100 56%) 0.3092
Epoch 12 | Iter 6200 | 632m 29s (- 495m 42s) (6200 56%) 0.3013
Epoch 12 | Iter 6300 | 632m 41s (- 495m 31s) (6300 56%) 0.3567
Epoch 12 | Iter 6400 | 632m 54s (- 495m 20s) (6400 56%) 0.2984
Epoch 12 | Iter 6500 | 633m 6s (- 495m 9s) (6500 56%) 0.3047
Epoch 12 | Iter 6600 | 633m 19s (- 494m 58s) (6600 56%) 0.3087
Epoch 12 | Iter 6700 | 633m 31s (- 494m 47s) (6700 56%) 0.3266
Epoch 12 | Iter 6800 | 633m 43s (- 494m 36s) (6800 56%) 0.3115
Epoch 12 | Iter 6900 | 633m 55s (- 494m 25s) (6900 56%) 0.337

Epoch 12 | Iter 18300 | 653m 39s (- 470m 43s) (18300 58%) 0.3544
Epoch 12 | Iter 18400 | 653m 48s (- 470m 29s) (18400 58%) 0.3809
Epoch 12 | Iter 18500 | 653m 57s (- 470m 16s) (18500 58%) 0.3049
Epoch 12 | Iter 18600 | 654m 6s (- 470m 2s) (18600 58%) 0.2885
Epoch 12 | Iter 18700 | 654m 15s (- 469m 49s) (18700 58%) 0.3007
Epoch 12 | Iter 18800 | 654m 23s (- 469m 35s) (18800 58%) 0.2612
Epoch 12 | Iter 18900 | 654m 32s (- 469m 22s) (18900 58%) 0.2974
Epoch 12 | Iter 19000 | 654m 41s (- 469m 9s) (19000 58%) 0.3158
Epoch 12 | Iter 19100 | 654m 50s (- 468m 55s) (19100 58%) 0.2779
Epoch 12 | Iter 19200 | 654m 59s (- 468m 42s) (19200 58%) 0.3114
Epoch 12 | Iter 19300 | 655m 8s (- 468m 28s) (19300 58%) 0.3017
Epoch 12 | Iter 19400 | 655m 17s (- 468m 15s) (19400 58%) 0.3603
Epoch 12 | Iter 19500 | 655m 26s (- 468m 1s) (19500 58%) 0.3940
Epoch 12 | Iter 19600 | 655m 34s (- 467m 48s) (19600 58%) 0.2825
Epoch 12 | Iter 19700 | 655m 43s (- 467m 34s) (19700 58%) 0.3026
Epoch 12 | Iter 19800 | 655m 5

Epoch 13 | Iter 2000 | 672m 52s (- 442m 12s) (2000 60%) 0.5322
Epoch 13 | Iter 2100 | 673m 7s (- 442m 3s) (2100 60%) 0.4740
Epoch 13 | Iter 2200 | 673m 22s (- 441m 54s) (2200 60%) 0.4490
Epoch 13 | Iter 2300 | 673m 37s (- 441m 45s) (2300 60%) 0.4380
Epoch 13 | Iter 2400 | 673m 52s (- 441m 36s) (2400 60%) 0.4605
Epoch 13 | Iter 2500 | 674m 7s (- 441m 27s) (2500 60%) 0.4191
Epoch 13 | Iter 2600 | 674m 22s (- 441m 18s) (2600 60%) 0.4357
Epoch 13 | Iter 2700 | 674m 38s (- 441m 9s) (2700 60%) 0.4038
Epoch 13 | Iter 2800 | 674m 53s (- 441m 0s) (2800 60%) 0.4356
Epoch 13 | Iter 2900 | 675m 7s (- 440m 50s) (2900 60%) 0.3995
Epoch 13 | Iter 3000 | 675m 22s (- 440m 41s) (3000 60%) 0.4177
Epoch 13 | Iter 3100 | 675m 36s (- 440m 31s) (3100 60%) 0.3909
Epoch 13 | Iter 3200 | 675m 52s (- 440m 22s) (3200 60%) 0.3774
Epoch 13 | Iter 3300 | 676m 6s (- 440m 13s) (3300 60%) 0.4215
Epoch 13 | Iter 3400 | 676m 21s (- 440m 3s) (3400 60%) 0.3908
Epoch 13 | Iter 3500 | 676m 35s (- 439m 54s) (3500 60%) 0.3792


Epoch 13 | Iter 15000 | 698m 34s (- 417m 53s) (15000 62%) 0.2939
Epoch 13 | Iter 15100 | 698m 44s (- 417m 41s) (15100 62%) 0.3599
Epoch 13 | Iter 15200 | 698m 54s (- 417m 29s) (15200 62%) 0.3515
Epoch 13 | Iter 15300 | 699m 3s (- 417m 16s) (15300 62%) 0.3017
Epoch 13 | Iter 15400 | 699m 13s (- 417m 4s) (15400 62%) 0.3127
Epoch 13 | Iter 15500 | 699m 23s (- 416m 51s) (15500 62%) 0.2932
Epoch 13 | Iter 15600 | 699m 33s (- 416m 39s) (15600 62%) 0.3263
Epoch 13 | Iter 15700 | 699m 42s (- 416m 26s) (15700 62%) 0.3103
Epoch 13 | Iter 15800 | 699m 52s (- 416m 14s) (15800 62%) 0.2907
Epoch 13 | Iter 15900 | 700m 2s (- 416m 1s) (15900 62%) 0.3318
Epoch 13 | Iter 16000 | 700m 12s (- 415m 49s) (16000 62%) 0.3179
Epoch 13 | Iter 16100 | 700m 22s (- 415m 36s) (16100 62%) 0.2970
Epoch 13 | Iter 16200 | 700m 31s (- 415m 24s) (16200 62%) 0.3825
Epoch 13 | Iter 16300 | 700m 41s (- 415m 11s) (16300 62%) 0.3112
Epoch 13 | Iter 16400 | 700m 51s (- 414m 59s) (16400 62%) 0.3220
Epoch 13 | Iter 16500 | 701m 

Epoch 13 | Iter 27700 | 715m 48s (- 389m 46s) (27700 64%) 0.3351
Epoch 13 | Iter 27800 | 715m 54s (- 389m 32s) (27800 64%) 0.2879
Epoch 13 | Iter 27900 | 716m 0s (- 389m 17s) (27900 64%) 0.2966
Epoch 13 | Iter 28000 | 716m 6s (- 389m 3s) (28000 64%) 0.3181
Epoch 13 | Iter 28100 | 716m 13s (- 388m 49s) (28100 64%) 0.5453
Epoch 13 | Iter 28200 | 716m 19s (- 388m 35s) (28200 64%) 0.3046
Epoch 13 | Iter 28300 | 716m 24s (- 388m 20s) (28300 64%) 0.2506
Epoch 13 | Iter 28400 | 716m 30s (- 388m 6s) (28400 64%) 0.3066
Epoch 13 | Iter 28500 | 716m 36s (- 387m 51s) (28500 64%) 0.3065
Epoch 13 | Iter 28600 | 716m 42s (- 387m 37s) (28600 64%) 0.4634
Epoch 13 | Iter 28700 | 716m 47s (- 387m 23s) (28700 64%) 0.3570
Epoch 13 | Iter 28800 | 716m 53s (- 387m 8s) (28800 64%) 0.2684
Epoch 13 | Iter 28900 | 716m 58s (- 386m 54s) (28900 64%) 0.3174
Epoch 13 | Iter 29000 | 717m 4s (- 386m 39s) (29000 64%) 0.4288
Epoch 13 | Iter 29100 | 717m 9s (- 386m 24s) (29100 64%) 0.3026
Epoch 14 | Iter 100 | 717m 40s (

Epoch 14 | Iter 11700 | 742m 35s (- 365m 40s) (11700 67%) 0.3111
Epoch 14 | Iter 11800 | 742m 45s (- 365m 29s) (11800 67%) 0.3123
Epoch 14 | Iter 11900 | 742m 56s (- 365m 17s) (11900 67%) 0.3155
Epoch 14 | Iter 12000 | 743m 6s (- 365m 5s) (12000 67%) 0.3230
Epoch 14 | Iter 12100 | 743m 16s (- 364m 53s) (12100 67%) 0.2949
Epoch 14 | Iter 12200 | 743m 27s (- 364m 41s) (12200 67%) 0.3787
Epoch 14 | Iter 12300 | 743m 37s (- 364m 29s) (12300 67%) 0.2757
Epoch 14 | Iter 12400 | 743m 47s (- 364m 17s) (12400 67%) 0.3124
Epoch 14 | Iter 12500 | 743m 57s (- 364m 5s) (12500 67%) 0.2930
Epoch 14 | Iter 12600 | 744m 7s (- 363m 53s) (12600 67%) 0.3083
Epoch 14 | Iter 12700 | 744m 18s (- 363m 41s) (12700 67%) 0.3007
Epoch 14 | Iter 12800 | 744m 28s (- 363m 29s) (12800 67%) 0.3083
Epoch 14 | Iter 12900 | 744m 38s (- 363m 17s) (12900 67%) 0.2961
Epoch 14 | Iter 13000 | 744m 48s (- 363m 5s) (13000 67%) 0.2948
Epoch 14 | Iter 13100 | 744m 58s (- 362m 53s) (13100 67%) 0.3898
Epoch 14 | Iter 13200 | 745m 8

Epoch 14 | Iter 24400 | 762m 22s (- 339m 38s) (24400 69%) 0.3159
Epoch 14 | Iter 24500 | 762m 30s (- 339m 25s) (24500 69%) 0.2822
Epoch 14 | Iter 24600 | 762m 38s (- 339m 13s) (24600 69%) 0.2485
Epoch 14 | Iter 24700 | 762m 46s (- 339m 0s) (24700 69%) 0.2950
Epoch 14 | Iter 24800 | 762m 54s (- 338m 47s) (24800 69%) 0.2111
Epoch 14 | Iter 24900 | 763m 3s (- 338m 34s) (24900 69%) 0.2976
Epoch 14 | Iter 25000 | 763m 11s (- 338m 22s) (25000 69%) 0.3007
Epoch 14 | Iter 25100 | 763m 19s (- 338m 9s) (25100 69%) 0.4450
Epoch 14 | Iter 25200 | 763m 28s (- 337m 56s) (25200 69%) 0.2939
Epoch 14 | Iter 25300 | 763m 36s (- 337m 44s) (25300 69%) 0.3263
Epoch 14 | Iter 25400 | 763m 44s (- 337m 31s) (25400 69%) 0.2344
Epoch 14 | Iter 25500 | 763m 52s (- 337m 18s) (25500 69%) 0.2437
Epoch 14 | Iter 25600 | 764m 0s (- 337m 5s) (25600 69%) 0.2851
Epoch 14 | Iter 25700 | 764m 8s (- 336m 53s) (25700 69%) 0.2423
Epoch 14 | Iter 25800 | 764m 16s (- 336m 40s) (25800 69%) 0.3253
Epoch 14 | Iter 25900 | 764m 23

Epoch 15 | Iter 8300 | 786m 59s (- 314m 54s) (8300 71%) 0.2956
Epoch 15 | Iter 8400 | 787m 11s (- 314m 42s) (8400 71%) 0.2950
Epoch 15 | Iter 8500 | 787m 22s (- 314m 31s) (8500 71%) 0.2928
Epoch 15 | Iter 8600 | 787m 33s (- 314m 20s) (8600 71%) 0.2845
Epoch 15 | Iter 8700 | 787m 45s (- 314m 8s) (8700 71%) 0.3044
Epoch 15 | Iter 8800 | 787m 56s (- 313m 57s) (8800 71%) 0.3117
Epoch 15 | Iter 8900 | 788m 8s (- 313m 46s) (8900 71%) 0.3310
Epoch 15 | Iter 9000 | 788m 19s (- 313m 34s) (9000 71%) 0.3105
Epoch 15 | Iter 9100 | 788m 30s (- 313m 23s) (9100 71%) 0.3070
Epoch 15 | Iter 9200 | 788m 41s (- 313m 12s) (9200 71%) 0.2946
Epoch 15 | Iter 9300 | 788m 53s (- 313m 1s) (9300 71%) 0.3081
Epoch 15 | Iter 9400 | 789m 4s (- 312m 49s) (9400 71%) 0.2901
Epoch 15 | Iter 9500 | 789m 15s (- 312m 38s) (9500 71%) 0.2840
Epoch 15 | Iter 9600 | 789m 26s (- 312m 26s) (9600 71%) 0.3276
Epoch 15 | Iter 9700 | 789m 37s (- 312m 15s) (9700 71%) 0.3112
Epoch 15 | Iter 9800 | 789m 48s (- 312m 3s) (9800 71%) 0.29

Epoch 15 | Iter 21100 | 808m 16s (- 289m 42s) (21100 73%) 0.2672
Epoch 15 | Iter 21200 | 808m 24s (- 289m 29s) (21200 73%) 0.3052
Epoch 15 | Iter 21300 | 808m 32s (- 289m 17s) (21300 73%) 0.2364
Epoch 15 | Iter 21400 | 808m 40s (- 289m 5s) (21400 73%) 0.3106
Epoch 15 | Iter 21500 | 808m 48s (- 288m 52s) (21500 73%) 0.2582
Epoch 15 | Iter 21600 | 808m 56s (- 288m 40s) (21600 73%) 0.3849
Epoch 15 | Iter 21700 | 809m 5s (- 288m 27s) (21700 73%) 0.3779
Epoch 15 | Iter 21800 | 809m 13s (- 288m 15s) (21800 73%) 0.2557
Epoch 15 | Iter 21900 | 809m 21s (- 288m 3s) (21900 73%) 0.2927
Epoch 15 | Iter 22000 | 809m 29s (- 287m 50s) (22000 73%) 0.3004
Epoch 15 | Iter 22100 | 809m 37s (- 287m 38s) (22100 73%) 0.2280
Epoch 15 | Iter 22200 | 809m 45s (- 287m 25s) (22200 73%) 0.3126
Epoch 15 | Iter 22300 | 809m 53s (- 287m 13s) (22300 73%) 0.2797
Epoch 15 | Iter 22400 | 810m 0s (- 287m 0s) (22400 73%) 0.2582
Epoch 15 | Iter 22500 | 810m 8s (- 286m 48s) (22500 73%) 0.3210
Epoch 15 | Iter 22600 | 810m 16

Epoch 16 | Iter 4900 | 830m 19s (- 264m 31s) (4900 75%) 0.3163
Epoch 16 | Iter 5000 | 830m 32s (- 264m 20s) (5000 75%) 0.3014
Epoch 16 | Iter 5100 | 830m 45s (- 264m 9s) (5100 75%) 0.3005
Epoch 16 | Iter 5200 | 830m 58s (- 263m 59s) (5200 75%) 0.3239
Epoch 16 | Iter 5300 | 831m 11s (- 263m 48s) (5300 75%) 0.3225
Epoch 16 | Iter 5400 | 831m 24s (- 263m 37s) (5400 75%) 0.3094
Epoch 16 | Iter 5500 | 831m 37s (- 263m 27s) (5500 75%) 0.3014
Epoch 16 | Iter 5600 | 831m 50s (- 263m 16s) (5600 75%) 0.3031
Epoch 16 | Iter 5700 | 832m 3s (- 263m 5s) (5700 75%) 0.3067
Epoch 16 | Iter 5800 | 832m 16s (- 262m 54s) (5800 75%) 0.3550
Epoch 16 | Iter 5900 | 832m 28s (- 262m 44s) (5900 76%) 0.3072
Epoch 16 | Iter 6000 | 832m 41s (- 262m 33s) (6000 76%) 0.3004
Epoch 16 | Iter 6100 | 832m 54s (- 262m 22s) (6100 76%) 0.3062
Epoch 16 | Iter 6200 | 833m 6s (- 262m 11s) (6200 76%) 0.2965
Epoch 16 | Iter 6300 | 833m 19s (- 262m 0s) (6300 76%) 0.3757
Epoch 16 | Iter 6400 | 833m 31s (- 261m 49s) (6400 76%) 0.33

Epoch 16 | Iter 17800 | 853m 50s (- 240m 8s) (17800 78%) 0.3008
Epoch 16 | Iter 17900 | 854m 0s (- 239m 56s) (17900 78%) 0.3338
Epoch 16 | Iter 18000 | 854m 10s (- 239m 44s) (18000 78%) 0.2612
Epoch 16 | Iter 18100 | 854m 19s (- 239m 33s) (18100 78%) 0.2906
Epoch 16 | Iter 18200 | 854m 29s (- 239m 21s) (18200 78%) 0.3015
Epoch 16 | Iter 18300 | 854m 39s (- 239m 9s) (18300 78%) 0.3549
Epoch 16 | Iter 18400 | 854m 49s (- 238m 58s) (18400 78%) 0.3846
Epoch 16 | Iter 18500 | 854m 59s (- 238m 46s) (18500 78%) 0.2933
Epoch 16 | Iter 18600 | 855m 8s (- 238m 34s) (18600 78%) 0.2801
Epoch 16 | Iter 18700 | 855m 18s (- 238m 23s) (18700 78%) 0.2995
Epoch 16 | Iter 18800 | 855m 27s (- 238m 11s) (18800 78%) 0.2633
Epoch 16 | Iter 18900 | 855m 37s (- 237m 59s) (18900 78%) 0.2982
Epoch 16 | Iter 19000 | 855m 47s (- 237m 48s) (19000 78%) 0.3100
Epoch 16 | Iter 19100 | 855m 56s (- 237m 36s) (19100 78%) 0.2851
Epoch 16 | Iter 19200 | 856m 6s (- 237m 24s) (19200 78%) 0.3032
Epoch 16 | Iter 19300 | 856m 1

Epoch 17 | Iter 1500 | 873m 56s (- 214m 59s) (1500 80%) 0.3701
Epoch 17 | Iter 1600 | 874m 11s (- 214m 49s) (1600 80%) 0.3511
Epoch 17 | Iter 1700 | 874m 27s (- 214m 39s) (1700 80%) 0.3776
Epoch 17 | Iter 1800 | 874m 43s (- 214m 29s) (1800 80%) 0.3457
Epoch 17 | Iter 1900 | 875m 0s (- 214m 19s) (1900 80%) 0.3560
Epoch 17 | Iter 2000 | 875m 16s (- 214m 9s) (2000 80%) 0.3553
Epoch 17 | Iter 2100 | 875m 31s (- 213m 58s) (2100 80%) 0.3550
Epoch 17 | Iter 2200 | 875m 48s (- 213m 49s) (2200 80%) 0.4519
Epoch 17 | Iter 2300 | 876m 3s (- 213m 38s) (2300 80%) 0.4040
Epoch 17 | Iter 2400 | 876m 19s (- 213m 28s) (2400 80%) 0.4101
Epoch 17 | Iter 2500 | 876m 35s (- 213m 18s) (2500 80%) 0.3679
Epoch 17 | Iter 2600 | 876m 51s (- 213m 8s) (2600 80%) 0.3908
Epoch 17 | Iter 2700 | 877m 7s (- 212m 58s) (2700 80%) 0.3477
Epoch 17 | Iter 2800 | 877m 23s (- 212m 48s) (2800 80%) 0.3582
Epoch 17 | Iter 2900 | 877m 39s (- 212m 38s) (2900 80%) 0.3374
Epoch 17 | Iter 3000 | 877m 55s (- 212m 28s) (3000 80%) 0.35

Epoch 17 | Iter 14500 | 901m 0s (- 191m 20s) (14500 82%) 0.2683
Epoch 17 | Iter 14600 | 901m 10s (- 191m 8s) (14600 82%) 0.2930
Epoch 17 | Iter 14700 | 901m 20s (- 190m 57s) (14700 82%) 0.2951
Epoch 17 | Iter 14800 | 901m 30s (- 190m 45s) (14800 82%) 0.2932
Epoch 17 | Iter 14900 | 901m 40s (- 190m 34s) (14900 82%) 0.3229
Epoch 17 | Iter 15000 | 901m 49s (- 190m 22s) (15000 82%) 0.2877
Epoch 17 | Iter 15100 | 901m 59s (- 190m 11s) (15100 82%) 0.3483
Epoch 17 | Iter 15200 | 902m 9s (- 189m 59s) (15200 82%) 0.3466
Epoch 17 | Iter 15300 | 902m 19s (- 189m 47s) (15300 82%) 0.2960
Epoch 17 | Iter 15400 | 902m 29s (- 189m 36s) (15400 82%) 0.3053
Epoch 17 | Iter 15500 | 902m 39s (- 189m 24s) (15500 82%) 0.2897
Epoch 17 | Iter 15600 | 902m 49s (- 189m 13s) (15600 82%) 0.3195
Epoch 17 | Iter 15700 | 902m 59s (- 189m 2s) (15700 82%) 0.2953
Epoch 17 | Iter 15800 | 903m 9s (- 188m 50s) (15800 82%) 0.2824
Epoch 17 | Iter 15900 | 903m 18s (- 188m 38s) (15900 82%) 0.3205
Epoch 17 | Iter 16000 | 903m 2

Epoch 17 | Iter 27200 | 919m 27s (- 166m 36s) (27200 84%) 0.2605
Epoch 17 | Iter 27300 | 919m 34s (- 166m 24s) (27300 84%) 0.5366
Epoch 17 | Iter 27400 | 919m 40s (- 166m 12s) (27400 84%) 0.3109
Epoch 17 | Iter 27500 | 919m 46s (- 166m 0s) (27500 84%) 0.2792
Epoch 17 | Iter 27600 | 919m 52s (- 165m 48s) (27600 84%) 0.2349
Epoch 17 | Iter 27700 | 919m 58s (- 165m 36s) (27700 84%) 0.3356
Epoch 17 | Iter 27800 | 920m 4s (- 165m 24s) (27800 84%) 0.3120
Epoch 17 | Iter 27900 | 920m 10s (- 165m 11s) (27900 84%) 0.2956
Epoch 17 | Iter 28000 | 920m 16s (- 164m 59s) (28000 84%) 0.3066
Epoch 17 | Iter 28100 | 920m 22s (- 164m 47s) (28100 84%) 0.5405
Epoch 17 | Iter 28200 | 920m 28s (- 164m 35s) (28200 84%) 0.2997
Epoch 17 | Iter 28300 | 920m 34s (- 164m 23s) (28300 84%) 0.2431
Epoch 17 | Iter 28400 | 920m 40s (- 164m 11s) (28400 84%) 0.2977
Epoch 17 | Iter 28500 | 920m 45s (- 163m 59s) (28500 84%) 0.3005
Epoch 17 | Iter 28600 | 920m 51s (- 163m 47s) (28600 84%) 0.4484
Epoch 17 | Iter 28700 | 920

Epoch 18 | Iter 11100 | 946m 22s (- 142m 38s) (11100 86%) 0.2843
Epoch 18 | Iter 11200 | 946m 32s (- 142m 27s) (11200 86%) 0.3095
Epoch 18 | Iter 11300 | 946m 43s (- 142m 16s) (11300 86%) 0.3771
Epoch 18 | Iter 11400 | 946m 54s (- 142m 4s) (11400 86%) 0.3032
Epoch 18 | Iter 11500 | 947m 4s (- 141m 53s) (11500 86%) 0.2884
Epoch 18 | Iter 11600 | 947m 15s (- 141m 42s) (11600 86%) 0.2973
Epoch 18 | Iter 11700 | 947m 26s (- 141m 31s) (11700 87%) 0.2934
Epoch 18 | Iter 11800 | 947m 36s (- 141m 19s) (11800 87%) 0.2931
Epoch 18 | Iter 11900 | 947m 47s (- 141m 8s) (11900 87%) 0.2649
Epoch 18 | Iter 12000 | 947m 57s (- 140m 57s) (12000 87%) 0.3074
Epoch 18 | Iter 12100 | 948m 8s (- 140m 45s) (12100 87%) 0.2791
Epoch 18 | Iter 12200 | 948m 18s (- 140m 34s) (12200 87%) 0.3693
Epoch 18 | Iter 12300 | 948m 29s (- 140m 23s) (12300 87%) 0.2693
Epoch 18 | Iter 12400 | 948m 39s (- 140m 11s) (12400 87%) 0.2977
Epoch 18 | Iter 12500 | 948m 49s (- 140m 0s) (12500 87%) 0.2861
Epoch 18 | Iter 12600 | 949m 0

Epoch 18 | Iter 23800 | 966m 47s (- 118m 33s) (23800 89%) 0.3136
Epoch 18 | Iter 23900 | 966m 55s (- 118m 21s) (23900 89%) 0.4662
Epoch 18 | Iter 24000 | 967m 3s (- 118m 9s) (24000 89%) 0.3092
Epoch 18 | Iter 24100 | 967m 11s (- 117m 58s) (24100 89%) 0.2456
Epoch 18 | Iter 24200 | 967m 20s (- 117m 46s) (24200 89%) 0.2844
Epoch 18 | Iter 24300 | 967m 27s (- 117m 35s) (24300 89%) 0.2477
Epoch 18 | Iter 24400 | 967m 35s (- 117m 23s) (24400 89%) 0.3243
Epoch 18 | Iter 24500 | 967m 43s (- 117m 12s) (24500 89%) 0.2837
Epoch 18 | Iter 24600 | 967m 51s (- 117m 0s) (24600 89%) 0.2407
Epoch 18 | Iter 24700 | 967m 58s (- 116m 49s) (24700 89%) 0.2890
Epoch 18 | Iter 24800 | 968m 6s (- 116m 37s) (24800 89%) 0.2085
Epoch 18 | Iter 24900 | 968m 14s (- 116m 25s) (24900 89%) 0.2978
Epoch 18 | Iter 25000 | 968m 22s (- 116m 14s) (25000 89%) 0.2931
Epoch 18 | Iter 25100 | 968m 30s (- 116m 2s) (25100 89%) 0.4450
Epoch 18 | Iter 25200 | 968m 37s (- 115m 51s) (25200 89%) 0.2909
Epoch 18 | Iter 25300 | 968m 4

Epoch 19 | Iter 7700 | 992m 2s (- 94m 18s) (7700 91%) 0.2938
Epoch 19 | Iter 7800 | 992m 14s (- 94m 7s) (7800 91%) 0.2891
Epoch 19 | Iter 7900 | 992m 26s (- 93m 56s) (7900 91%) 0.3014
Epoch 19 | Iter 8000 | 992m 37s (- 93m 44s) (8000 91%) 0.2940
Epoch 19 | Iter 8100 | 992m 49s (- 93m 33s) (8100 91%) 0.2954
Epoch 19 | Iter 8200 | 993m 1s (- 93m 22s) (8200 91%) 0.3572
Epoch 19 | Iter 8300 | 993m 13s (- 93m 11s) (8300 91%) 0.2990
Epoch 19 | Iter 8400 | 993m 24s (- 93m 0s) (8400 91%) 0.2939
Epoch 19 | Iter 8500 | 993m 36s (- 92m 49s) (8500 91%) 0.2836
Epoch 19 | Iter 8600 | 993m 47s (- 92m 38s) (8600 91%) 0.2768
Epoch 19 | Iter 8700 | 993m 59s (- 92m 27s) (8700 91%) 0.3008
Epoch 19 | Iter 8800 | 994m 10s (- 92m 15s) (8800 91%) 0.3002
Epoch 19 | Iter 8900 | 994m 22s (- 92m 4s) (8900 91%) 0.3276
Epoch 19 | Iter 9000 | 994m 33s (- 91m 53s) (9000 91%) 0.3036
Epoch 19 | Iter 9100 | 994m 44s (- 91m 42s) (9100 91%) 0.2992
Epoch 19 | Iter 9200 | 994m 56s (- 91m 31s) (9200 91%) 0.2941
Epoch 19 | It

Epoch 19 | Iter 20600 | 1014m 4s (- 70m 9s) (20600 93%) 0.3386
Epoch 19 | Iter 20700 | 1014m 13s (- 69m 58s) (20700 93%) 0.2905
Epoch 19 | Iter 20800 | 1014m 21s (- 69m 47s) (20800 93%) 0.2872
Epoch 19 | Iter 20900 | 1014m 29s (- 69m 35s) (20900 93%) 0.2563
Epoch 19 | Iter 21000 | 1014m 37s (- 69m 24s) (21000 93%) 0.2945
Epoch 19 | Iter 21100 | 1014m 45s (- 69m 12s) (21100 93%) 0.2668
Epoch 19 | Iter 21200 | 1014m 54s (- 69m 1s) (21200 93%) 0.3057
Epoch 19 | Iter 21300 | 1015m 2s (- 68m 50s) (21300 93%) 0.2394
Epoch 19 | Iter 21400 | 1015m 10s (- 68m 38s) (21400 93%) 0.3088
Epoch 19 | Iter 21500 | 1015m 18s (- 68m 27s) (21500 93%) 0.2559
Epoch 19 | Iter 21600 | 1015m 26s (- 68m 16s) (21600 93%) 0.3784
Epoch 19 | Iter 21700 | 1015m 34s (- 68m 4s) (21700 93%) 0.3677
Epoch 19 | Iter 21800 | 1015m 42s (- 67m 53s) (21800 93%) 0.2491
Epoch 19 | Iter 21900 | 1015m 50s (- 67m 42s) (21900 93%) 0.2845
Epoch 19 | Iter 22000 | 1015m 58s (- 67m 30s) (22000 93%) 0.2944
Epoch 19 | Iter 22100 | 1016m 

Epoch 20 | Iter 4400 | 1036m 9s (- 45m 56s) (4400 95%) 0.3497
Epoch 20 | Iter 4500 | 1036m 24s (- 45m 45s) (4500 95%) 0.3027
Epoch 20 | Iter 4600 | 1036m 39s (- 45m 35s) (4600 95%) 0.3137
Epoch 20 | Iter 4700 | 1036m 52s (- 45m 24s) (4700 95%) 0.3204
Epoch 20 | Iter 4800 | 1037m 7s (- 45m 13s) (4800 95%) 0.3117
Epoch 20 | Iter 4900 | 1037m 22s (- 45m 2s) (4900 95%) 0.3164
Epoch 20 | Iter 5000 | 1037m 37s (- 44m 51s) (5000 95%) 0.3032
Epoch 20 | Iter 5100 | 1037m 52s (- 44m 40s) (5100 95%) 0.2983
Epoch 20 | Iter 5200 | 1038m 6s (- 44m 29s) (5200 95%) 0.3099
Epoch 20 | Iter 5300 | 1038m 21s (- 44m 18s) (5300 95%) 0.3091
Epoch 20 | Iter 5400 | 1038m 35s (- 44m 7s) (5400 95%) 0.3003
Epoch 20 | Iter 5500 | 1038m 50s (- 43m 56s) (5500 95%) 0.2903
Epoch 20 | Iter 5600 | 1039m 4s (- 43m 45s) (5600 95%) 0.2968
Epoch 20 | Iter 5700 | 1039m 18s (- 43m 34s) (5700 95%) 0.2869
Epoch 20 | Iter 5800 | 1039m 33s (- 43m 23s) (5800 95%) 0.3377
Epoch 20 | Iter 5900 | 1039m 47s (- 43m 12s) (5900 96%) 0.297

Epoch 20 | Iter 17300 | 1062m 1s (- 22m 4s) (17300 97%) 0.3567
Epoch 20 | Iter 17400 | 1062m 11s (- 21m 53s) (17400 97%) 0.3197
Epoch 20 | Iter 17500 | 1062m 22s (- 21m 42s) (17500 97%) 0.2988
Epoch 20 | Iter 17600 | 1062m 32s (- 21m 31s) (17600 98%) 0.2805
Epoch 20 | Iter 17700 | 1062m 42s (- 21m 19s) (17700 98%) 0.2711
Epoch 20 | Iter 17800 | 1062m 52s (- 21m 8s) (17800 98%) 0.2915
Epoch 20 | Iter 17900 | 1063m 2s (- 20m 57s) (17900 98%) 0.3331
Epoch 20 | Iter 18000 | 1063m 12s (- 20m 46s) (18000 98%) 0.2550
Epoch 20 | Iter 18100 | 1063m 22s (- 20m 35s) (18100 98%) 0.2832
Epoch 20 | Iter 18200 | 1063m 32s (- 20m 24s) (18200 98%) 0.2954
Epoch 20 | Iter 18300 | 1063m 42s (- 20m 12s) (18300 98%) 0.3408
Epoch 20 | Iter 18400 | 1063m 51s (- 20m 1s) (18400 98%) 0.3737
Epoch 20 | Iter 18500 | 1064m 1s (- 19m 50s) (18500 98%) 0.2856
Epoch 20 | Iter 18600 | 1064m 11s (- 19m 39s) (18600 98%) 0.2728
Epoch 20 | Iter 18700 | 1064m 22s (- 19m 28s) (18700 98%) 0.2907
Epoch 20 | Iter 18800 | 1064m 3

In [30]:
class Args(object):
    def __init__(self, beam_size, nbest, decode_max_len):
        self.beam_size = beam_size
        self.nbest = nbest
        self.decode_max_len = decode_max_len

In [42]:
b = 200
n = 20
sample = te_dataset[b][n][1]
args = Args(30, 1, sample["output"][0]["shape"][0]+1)

input_tensor = torch.tensor(kaldi_io.read_mat(sample["input"][0]["feat"]))
a = input_tensor.shape[0]//4*4
input_tensor = input_tensor[0:(a if a < MAX_LENGTH*4 else MAX_LENGTH*4 ),:]

g = input_tensor.shape[0]
print(torch.tensor([g]))

input_tensor = input_tensor.unsqueeze(0)
print("input_tensor:",input_tensor.shape)
b = model.recognize(input_tensor.to(device),torch.tensor([g]), char_list, args)
print("正解：", sample["output"][0]["text"])

tensor([452])
input_tensor: torch.Size([1, 452, 240])
remeined hypothes: 29
hypo: 雨
hypo: 依
hypo: 狙
hypo: 已
hypo: 以
hypo: 遇
hypo: 一
hypo: 与
hypo: 于
hypo: 玉
hypo: 虞
hypo: E
hypo: 预
hypo: 伊
hypo: 御
hypo: 羽
hypo: 余
hypo: 渝
hypo: 许
hypo: 怡
hypo: 徐
hypo: 尤
hypo: 渔
hypo: G
hypo: 瑜
hypo: 乙
hypo: 芋
hypo: U
hypo: 育
remeined hypothes: 30
hypo: 狙击
hypo: 雨季
hypo: 以及
hypo: 已经
hypo: 依据
hypo: 预计
hypo: 虞姬
hypo: 遇到
hypo: 依滴
hypo: 伊迪
hypo: 玉季
hypo: ED
hypo: 雨滴
hypo: 一滴
hypo: 遇记
hypo: 一起
hypo: 与急
hypo: EG
hypo: 雨洁
hypo: 遇及
hypo: 雨起
hypo: 一直
hypo: 遇起
hypo: 于急
hypo: 与几
hypo: 羽洁
hypo: 尤其
hypo: 一击
hypo: 依及
hypo: 已起
remeined hypothes: 22
hypo: 已经一
hypo: 以及一
hypo: 预计一
hypo: 依据一
hypo: 雨季香
hypo: 伊迪香
hypo: 依据虾
hypo: EDC
hypo: 一起一
hypo: 依据香
hypo: 狙击虾
hypo: 狙击一
hypo: 与几十
hypo: 雨季轩
hypo: 依滴虾
hypo: 玉季香
hypo: 玉季虾
hypo: 已经销
hypo: 一滴一
hypo: 虞姬香
hypo: 雨季加
hypo: 依据肖
remeined hypothes: 29
hypo: 雨季香伴
hypo: 伊迪香伴
hypo: 以及一一
hypo: 已经一一
hypo: 依据香伴
hypo: 已经一十
hypo: 以及一些
hypo: 依据虾伴
hypo: 依据肖传
hypo: 已经一些
hypo: 以及一十
hypo: 预计一一
hypo

In [None]:
with open("result.txt", "w") as f:
    out = sys.stdout
    log = open("print.txt","a")
    sys.stdout = log
    for i in range(len(te_dataset)):
        for j in range(len(te_dataset[i])):
            utt, sample = te_dataset[i][j][0],te_dataset[i][j][1]
            input_tensor = torch.tensor(kaldi_io.read_mat(sample["input"][0]["feat"]))
            a = input_tensor.shape[0]//4*4
            input_tensor = input_tensor[0:(a if a < MAX_LENGTH*4 else MAX_LENGTH*4 ),:]

            g = input_tensor.shape[0]
            print(torch.tensor([g]))

            input_tensor = input_tensor.unsqueeze(0)
            print("input_tensor:",input_tensor.shape)
            b = model.recognize(input_tensor.to(device),torch.tensor([g]), char_list, args)
            print("正解：", sample["output"][0]["text"])
            res = add_results_to_json(sample, b, char_list)
            f.write(utt+":\n"+"正解:"+res["output"][0]["text"]+"\n"+"预测:"+res["output"][0]["rec_text"]+"\n\n")
            f.flush()
            sys.stdout.flush()
    sys.stdout = out

In [41]:
len(tr_dataset)

29187

21

In [None]:
for ch in data_list[1]["corpus"]:
    print(ch.item())

In [None]:
d = torch.tensor([[[1,2,3],[4,5,6],[7,8,9],[10,11,12]]])

In [None]:
lang.index2word[1]

In [None]:
d.view((4,1,3))

In [None]:
d[0]