In [None]:
import random
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable


class Encoder(nn.Module):

    def __init__(self, vocab_size, embedding_size, hidden_size, n_layers=1, dropout=0.5):
        """
        Bidirectional LSTM encoder; aggregate the outputs of two directions by summation
        :param int vocab_size: size of the source language vocabulary
        :param int embedding_size: size of embedded
        :param int hidden_size: size of hidden states
        :param int n_layers: number of layers
        :param float dropout: dropout rate
        """
        super(Encoder, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.n_layers = n_layers
        self.dropout = dropout

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size, n_layers, dropout=dropout, bidirectional=True)

    def forward(self, input_sentence, hidden=None):
        """
        forward propagation
        :param torch.Tensor input_sentence: source sentence; shape of (T, B)
        :param tuple hidden: previous hidden states, (h, c)
        :return: outputs sequence, the last hidden states include h and c
        """
        embedded = self.embedding(input_sentence)  # (T, B, E)
        outputs, hidden = self.lstm(embedded, hidden)
        # aggregate hidden states of two directions by sum
        outputs = (outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:])  # (T, B, H)
        return outputs, hidden


class Attention(nn.Module):

    def __init__(self, hidden_size):
        """
        attention weights; two-layer MLP as alignment model
        :param int hidden_size: the hidden state size of encoder
        """
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attn1 = nn.Linear(2*hidden_size, hidden_size)  # first fc layer
        self.attn2 = nn.Linear(hidden_size, 1)  # second fc layer

    def forward(self, encoder_outputs, hidden):
        """
        calculate attention score
        :param torch.Tensor encoder_outputs: sequence of outputs from encoder
        :param torch.Tensor hidden: the previous hidden h of decoder, shape of (1, B, H)
        :return: attention score
        """
        length = encoder_outputs.size(0)
        encoder_outputs = encoder_outputs.transpose(0, 1)  # (B, T, H)
        hidden = hidden.repeat(length, 1, 1).transpose(0, 1)  # (B, T, H)
        # calculate energies
        energies = self.attn1(torch.cat([encoder_outputs, hidden], dim=2))
        energies = F.relu(energies)  # (B, T, H)
        energies = self.attn2(energies)  # (B, T, 1)
        # calculate attention weights
        attention = F.softmax(energies.squeeze(2), dim=1).unsqueeze(1)  # (B, 1, T)
        return attention


class Decoder(nn.Module):

    def __init__(self, vocab_size, embedding_size, hidden_size, n_layers=1, dropout=0.5):
        """
        :param int vocab_size: size of the target language vocabulary
        :param int embedding_size: size of embedded
        :param int hidden_size: size of hidden states
        :param int n_layers: number of layers
        :param float dropout: dropout rate
        """
        super(Decoder, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.n_layers = n_layers
        self.dropout = dropout

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size + hidden_size, hidden_size, n_layers, dropout=dropout)
        self.attention = Attention(hidden_size)  # encoder and decoder will use same hidden size for simplicity
        self.out = nn.Linear(2*hidden_size, vocab_size)

    def forward(self, input_word, hidden, encoder_outputs):
        """
        forward propagation
        :param torch.Tensor input_word: the input word tensor, shape of (B, )
        :param tuple hidden: previous hidden states
        :param torch.Tensor encoder_outputs: output from encoder
        :return: log likelihood of output words; final hidden states
        """
        # get input word embeddings
        embedded = self.embedding(input_word)  # (B, E)
        embedded = embedded.unsqueeze(0)  # (1, B, E)
        # calculate context
        attention = self.attention(encoder_outputs, hidden[0][-1:])  # hidden h uses the last layer
        context = attention.bmm(encoder_outputs.transpose(0, 1))  # (B, 1, H)
        context = context.transpose(0, 1)  # (1, B, H)
        # concatenate LSTM input
        lstm_input = torch.cat([embedded, context], dim=2)  # (1, B, E+H)
        output, hidden = self.lstm(lstm_input, hidden)
        # concatenate final layer input
        context = context.squeeze(0)  # (B, H)
        output = output.squeeze(0)  # (B, H)
        final_input = torch.cat([output, context], dim=1)  # (B, 2H)
        # final output layer
        output = self.out(final_input)  # (B, N)
        output = F.log_softmax(output, dim=1)
        return output, hidden


class Seq2Seq(nn.Module):

    def __init__(self, encoder, decoder):
        """
        the final Seq2Seq model with attention
        :param Encoder encoder: the Encoder class
        :param Decoder decoder: the Decoder class
        """
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_forcing=0.5):
        """
        forward propagation
        :param torch.Tensor source: the source sentences, shape of (T', B)
        :param torch.Tensor target: the target sentences, shape of (T, B)
        :param float teacher_forcing: the teacher forcing ratio
        :return: translated sentences
        """
        # initialize outputs
        target_len = target.size(0)
        batch_size = source.size(1)
        vocab_size = self.decoder.vocab_size
        outputs = Variable(torch.zeros(target_len, batch_size, vocab_size))  # (T, B, N)

        # encoding source sentences
        encoder_outputs, hidden = self.encoder(source)
        # initialize decoder as the final hidden states of encoder
        hidden = (hidden[0][:self.decoder.n_layers], hidden[1][:self.decoder.n_layers])

        # decoding word by word
        output = Variable(target.data[0, :])  # first output <SOS>, shape of (B,)
        for t in range(1, target_len):
            output, hidden = self.decoder(output, hidden, encoder_outputs)
            outputs[t] = output  # fill in outputs
            top1 = output.max(1)[1]  # the word index with the largest log likelihood
            # teacher forcing
            is_teacher = random.random() < teacher_forcing
            output = Variable(target.data[t] if is_teacher else top1)
        return outputs

    def translate(self, source, max_length):
        """
        translate one sentence
        :param torch.Tensor source: the source sentence, shape of (T', 1)
        :param int max_length: max translation length
        :return: translated sentence
        """
        # encoding source sentences
        encoder_outputs, hidden = self.encoder(source)
        # initialize decoder as the final hidden states of encoder
        hidden = (hidden[0][:self.decoder.n_layers], hidden[1][:self.decoder.n_layers])
        outputs = []
        # decoding word by word
        output = torch.LongTensor([2])  # first output <SOS>, shape of (1,)
        for t in range(1, max_length+1):
            output, hidden = self.decoder(output, hidden, encoder_outputs)
            top1 = output.max(1)[1]  # the word index with the largest log likelihood
            outputs.append(top1.item())  # fill in outputs
            # break if <EOS>
            if top1 == 3:
                break
            output = top1
        return outputs


if __name__ == "__main__":
    test_encoder = Encoder(20, 10, 5)
    test_decoder = Decoder(20, 10, 5)
    model = Seq2Seq(test_encoder, test_decoder)

    src = torch.randint(high=20, size=(20, 8))
    trg = torch.randint(high=20, size=(10, 8))

    res = model(src, trg)
    print(res.shape)

    test_sen = torch.randint(high=20, size=(20, 1))
    res = model.translate(test_sen, 10)
    print(res)

torch.Size([10, 8, 20])
[18, 18, 6, 18, 6, 18, 18, 6, 18, 6]


  "num_layers={}".format(dropout, num_layers))


In [None]:
from collections import Counter


class Vocabulary(object):
    """a vocabulary for NLP corpus"""

    def __init__(self, max_vocab_size=50000, min_count=None, start_end_tokens=False, name="Vocabulary1"):
        self.name = name
        self.max_vocab_size = max_vocab_size
        self.min_count = min_count
        self.start_end_tokens = start_end_tokens  # whether include start and end tokens
        self.vocabulary_size = 2
        self.word2idx = {"<UNK>": 0, "<PAD>": 1}
        self.idx2word = ["<UNK>", "<PAD>"]
        self.idx2count = [0, 0]

    def buildVocabulary(self, data):
        """
        method to build a vocabulary
        :param list data: a list of strings, with each string being a sentence
        :return: None
        """
        # add start and end token
        if self.start_end_tokens:
            self.idx2word += ['<SOS>', '<EOS>']
            self.vocabulary_size += 2

        # count words
        counter = Counter(
            [word for sentence in data for word in sentence.split()])
        # filter words by their counts
        if self.max_vocab_size:
            counter = {word: freq for word, freq in counter.most_common(self.max_vocab_size - self.vocabulary_size)}
        # filter words with low frequency
        if self.min_count:
            counter = {word: freq for word, freq in counter.items() if freq >= self.min_count}

        # generate attributes
        self.idx2word += list(sorted(counter.keys()))
        self.idx2count = [counter.get(word, 0) for word in self.idx2word]
        self.word2idx = {word: idx for idx, word in enumerate(self.idx2word)}
        self.vocabulary_size = len(self.idx2word)

    def indexer(self, word):
        """
        return word index
        :param str word: the word to query
        :return: word index
        """
        try:
            return self.word2idx[word]
        except KeyError:
            return self.word2idx['<UNK>']

In [None]:
import jieba
import re
import random
import torch
import logging


# map Chinese punctuations into English punctuations
ch2en = {
    '！': '!',
    '？': '?',
    '。': '.',
    '（': '(',
    '）': ')',
    '，': ',',
    '：': ':',
    '；': ';',
    '｀': ','
}


def normalizeString(s):
    """
    lower case and trim punctuations and numbers
    :param str s: input string
    :return: normalised string
    """
    s = s.lower().strip()
    # check Chinese punctuations
    s = "".join(char if char not in ch2en.keys() else ch2en[char] for char in s)
    s = re.sub(r"([.!?])", r" \1", s)  # only keep . ! ?
    # keep alphabets, the three punctuations and Chinese characters
    s = re.sub(r"[^a-zA-Z.!?\u4e00-\u9fff]+", r" ", s)
    return s


def preprocess(data_path):
    """
    pre-process text data and return vocabularies
    :param str data_path: input data path
    :return: a list of lists of processed sentence pairs
    """
    # read data and split into pairs
    lines = open(data_path, encoding="utf-8").read().strip().split('\n')
    pairs = [[normalizeString(s) for s in line.split('\t')[:2]] for line in lines]

    # segment Chinese sentences
    # 这边会自己做分词
    for line in pairs:
        line[1] = " ".join(word for word in jieba.lcut(line[1]))

    return pairs

def Chpreprocess(data_path):
    """
    pre-process text data and return vocabularies
    :param str data_path: input data path
    :return: a list of lists of processed sentence pairs
    """
    # read data and split into pairs
    lines = open(data_path, encoding="utf-8").read().strip().split('\n')
    pairs = [[normalizeString(s) for s in line.split('\t')[:2]] for line in lines]

    # segment Chinese sentences
    # 这边会自己做分词
    for line in pairs:
        line[1] = " ".join(word for word in jieba.lcut(line[1]))
        line[0] = " ".join(word for word in jieba.lcut(line[0]))

    return pairs


def getVocabulary(pairs, input_lang, output_lang, max_vocab_size, reverse=False, start_end_tokens=True):
    """
    generate vocabularies for the pairs
    :param list pairs: language sentence pairs
    :param str input_lang: input language name
    :param str output_lang: output language name
    :param int max_vocab_size: max vocabulary size
    :param bool reverse: whether to inverse the input and output sentences
    :param bool start_end_tokens: whether to use start and end tokens
    :return: two vocabularies
    """
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]

    # initialise vocabularies
    input_vocab = Vocabulary(name=input_lang, start_end_tokens=start_end_tokens, max_vocab_size=max_vocab_size)
    output_vocab = Vocabulary(name=output_lang, start_end_tokens=start_end_tokens, max_vocab_size=max_vocab_size)
    input_sentences = []
    output_sentences = []

    # build vocabularies
    for pair in pairs:
        input_sentences.append(pair[0])
        output_sentences.append(pair[1])
    input_vocab.buildVocabulary(input_sentences)
    output_vocab.buildVocabulary(output_sentences)

    return input_vocab, output_vocab

def ChgetVocabulary(pairs, lang, max_vocab_size, reverse=False, start_end_tokens=True):
    """
    generate vocabularies for the pairs
    :param list pairs: language sentence pairs
    :param str input_lang: input language name
    :param str output_lang: output language name
    :param int max_vocab_size: max vocabulary size
    :param bool reverse: whether to inverse the input and output sentences
    :param bool start_end_tokens: whether to use start and end tokens
    :return: two vocabularies
    """
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]

    # initialise vocabularies
    vocab = Vocabulary(name=lang, start_end_tokens=start_end_tokens, max_vocab_size=max_vocab_size)
    sentences = []
    
    # build vocabularies
    for pair in pairs:
        sentences.append(pair[0])
        sentences.append(pair[1])
    vocab.buildVocabulary(sentences)

    return vocab


def sentence2index(vocabulary, sentence):
    """
    convert sentence into list of index
    :param Vocabulary vocabulary: the vocabulary of the language
    :param str sentence: input sentence
    :return: list of index
    """
    index = [vocabulary.indexer("<SOS>")]
    index += [vocabulary.indexer(word) for word in sentence.split()]
    index += [vocabulary.indexer("<EOS>")]
    return index


def train_test_split(data, test_split=0.3):
    """
    split data into train and text sets
    :param list data: text data list
    :param float test_split: text proportion
    :return: train data and test data
    """
    size = len(data)
    split = size - int(size * test_split)
    random.seed(66)
    random.shuffle(data)
    return data[:split], data[split:]


def translation(sentence, model, max_length, src_vocab, trg_vocab):
    """
    translate one source sentence
    :param str sentence: source sentence
    :param model: the Seq2Seq model
    :param int max_length: max sentence length
    :param src_vocab: source language vocabulary
    :param trg_vocab: target language vocabulary
    :return: one target sentence
    """
    sentence = normalizeString(sentence)
    index = sentence2index(src_vocab, sentence)
    source = torch.LongTensor(index).unsqueeze(1)  # (T, 1)
    model = model.cpu()
    target = model.translate(source, max_length)
    target = [trg_vocab.idx2word[idx] for idx in target]
    if target[-1] == "<EOS>":
        target = target[:-1]
    return "".join(word for word in target)


def create_logger(filename):
    """
    create a logger
    :param str filename: log file name include path
    :return: logger
    """
    logger = logging.getLogger(__name__)
    fmt = '%(message)s'
    format_str = logging.Formatter(fmt)  # formatting
    logger.setLevel(logging.INFO)  # level
    sh = logging.StreamHandler()  # output on screen
    sh.setFormatter(format_str)  
    th = logging.FileHandler(filename)
    th.setFormatter(format_str)  # output into file
    logger.addHandler(sh)  
    logger.addHandler(th)
    return logger

logger = create_logger("training.log")

'''

'\nif __name__ == "__main__":\n    data = preprocess(Config.data_path)\n    English, Chinese = getVocabulary(data, "English", "Chinese", 10000)\n    index = sentence2index(Chinese, data[2046][1])\n    print(data[2046][1])\n    print(index)\n    print([Chinese.idx2word[idx] for idx in index])'

In [None]:
if __name__ == "__main__":
    data = Chpreprocess(Config.data_path)
    Chinese = ChgetVocabulary(data, "Chinese", 10000)
    index = sentence2index(Chinese, data[2046][1])
    print(data[2046][1])
    print(data[2046][0])
    print(index)
    print([Chinese.idx2word[idx] for idx in index])

郑州 在 哪个 省
郑州 是 哪个 省 的
[2, 9136, 2576, 2347, 6969, 3]
['<SOS>', '郑州', '在', '哪个', '省', '<EOS>']


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class TranslationData(Dataset):

    def __init__(self, data, src_vocab, trg_vocab):
        """
        Dataset for translation
        :param list data: processed data
        :param src_vocab: source language vocabulary
        :param trg_vocab: target language vocabulary
        """
        super(TranslationData, self).__init__()
        self.data = data
        self.src_vocab = src_vocab
        self.trg_vocab = trg_vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        src_sentence = self.data[idx][0]
        trg_sentence = self.data[idx][1]
        src_index = sentence2index(self.src_vocab, src_sentence)
        trg_index = sentence2index(self.trg_vocab, trg_sentence)
        src = torch.LongTensor(src_index)
        trg = torch.LongTensor(trg_index)
        return src, trg


def collate_fn(batch):
    """
    customized collate function to dynamically padding mini-batches
    :param batch: the list of samples from Dataloader
    :return: padded mini-batch
    """
    pad_index = 1  # the <PAD> index in vocabulary
    src_list = [sample[0] for sample in batch]  # list of each language sentences
    trg_list = [sample[1] for sample in batch]

    def padding(sentence_list):
        """padding each sentence to the right"""
        max_len = max([sentence.size(0) for sentence in sentence_list])
        pad_sen = [sen.tolist() + [pad_index] * max(0, max_len - len(sen))
                   for sen in sentence_list]
        return torch.LongTensor(pad_sen).transpose(0, 1)  # shape of (T, B)

    return padding(src_list), padding(trg_list)


'if __name__ == "__main__":\n    data = Chpreprocess(Config.data_path)\n    src_vocab = ChgetVocabulary(data,"Chinese",10000)\n    trg_vocab = src_vocab\n    dataset = TranslationData(data, src_vocab, trg_vocab)\n    source, target = dataset[66]\n    print(source.size())\n    print(target.size())\n    loader = DataLoader(dataset, batch_size=4, collate_fn=collate_fn)\n    for i, batch in enumerate(loader):\n        source, target = batch\n        print(source.size())\n        print(target.size())\n        break'

In [None]:
import torch
from torch.nn.utils import clip_grad_norm_
from datetime import datetime


def train(model, dataloader, optimizer, criterion, vocab_size, grad_clip, teacher_forcing):
    """
    training over one epoch
    :param model: the Seq2Seq model
    :param dataloader: training dataloader
    :param optimizer: training optimiser
    :param criterion: loss function
    :param vocab_size: target vocabulary size
    :param grad_clip: max gradient
    :param teacher_forcing: teacher forcing ratio for training
    :return: list of losses per 100 mini-batches
    """
    model.to(Config.device)
    model.train()
    batch_losses = []
    batch_loss = 0
    n_batches = len(dataloader)
    for i, batch in enumerate(dataloader):
        source, target = batch
        source = source.to(Config.device)
        target = target.to(Config.device)
        target_len = target.size(0)
        optimizer.zero_grad()
        output = model(source, target, teacher_forcing).to(Config.device)  # forward propagation
        #output = output.to(Config.device)
        loss = criterion(output[1:].view(-1, vocab_size),
                         target[1:].contiguous().view(-1))  # calculate NLL loss, ignore first token <SOS>
        loss.backward()  # backward propagation
        clip_grad_norm_(model.parameters(), grad_clip)  # clip gradients
        optimizer.step()  # update parameters
        batch_loss += loss.data.item() / target_len

        # print results every 100 mini-batches
        if i % 100 == 0 and i != 0:
            batch_loss = batch_loss / 100  # average loss
            batch_losses.append(batch_loss)
            logger.info("%s | Finished %.1f%% | Mini-batch %d | Avg Loss: %5.2f" %
                        (datetime.now().strftime('%H:%M:%S'), (i+1) / n_batches * 100, i+1, batch_loss))
            batch_loss = 0
    return batch_losses


def evaluate(model, dataloader, criterion, vocab_size):
    """
    evaluation over one epoch
    :param model: the Seq2Seq model
    :param dataloader: training dataloader
    :param criterion: loss function
    :param vocab_size: target vocabulary size
    :return: average evaluation loss
    """
    with torch.no_grad():
        model.eval()
        eval_loss = 0
        batch_loss = 0
        n_batches = len(dataloader)
        for i, batch in enumerate(dataloader):
            source, target = batch
            source = source.to(Config.device)
            target = target.to(Config.device)
            target_len = target.size(0)
            output = model(source, target, teacher_forcing=0.0)  # forward propagation
            output = output.to(Config.device)
            loss = criterion(output[1:].view(-1, vocab_size),
                             target[1:].contiguous().view(-1))  # calculate NLL loss
            eval_loss += loss.data.item() / target_len
            batch_loss += loss.data.item() / target_len

            # print results every 100 mini-batches
            if i % 100 == 0 and i != 0:
                batch_loss = batch_loss / 100  # average loss
                logger.info("%s | Finished %.1f%% | Mini-batch %d | Avg Loss: %5.2f" %
                            (datetime.now().strftime('%H:%M:%S'), (i + 1) / n_batches * 100, i + 1, batch_loss))
                batch_loss = 0
    return eval_loss / len(dataloader)

In [None]:
import torch
from torch.utils.data import DataLoader
from torch.nn import NLLLoss
from torch import optim

from datetime import datetime
import pickle

In [None]:
import torch
from torch.utils.data import DataLoader
from torch.nn import NLLLoss
from torch import optim

from datetime import datetime
import pickle


'''# import and process data
data = preprocess(Config.data_path)
# data = data[:5000]  # test a small set
en_vocab, cn_vocab = getVocabulary(data, "English", "Chinese", Config.max_vocab_size)
train_data, eval_data = train_test_split(data, test_split=0.3)'''

# import and process data
data = Chpreprocess("/content/drive/MyDrive/all.txt")
#data = data[:5000]  # test a small set
cn_vocab = ChgetVocabulary(data, "Chinese", Config.max_vocab_size)
train_data, eval_data = train_test_split(data, test_split=0.3)

for parameter in Config.parameter_list[2:3]:
    print(parameter)
    embedding_size = parameter[0]
    hidden_size = parameter[1]
    dropout = parameter[2]
    teacher_forcing = parameter[3]
    batch_size = parameter[4]
    learning_rate = parameter[5]
    print("目前参数情况为:",embedding_size,',',hidden_size,',',dropout,',',teacher_forcing,',',batch_size,',',learning_rate)
    # get dataloader
    train_dataset = TranslationData(train_data, cn_vocab, cn_vocab)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_fn)

    eval_dataset = TranslationData(eval_data, cn_vocab, cn_vocab)
    eval_loader = DataLoader(eval_dataset, batch_size=batch_size, collate_fn=collate_fn)

    # prepare training
    encoder = Encoder(cn_vocab.vocabulary_size, embedding_size, hidden_size, Config.n_layers, dropout)
    decoder = Decoder(cn_vocab.vocabulary_size, embedding_size, hidden_size, Config.n_layers, dropout)
    model2 = Seq2Seq(encoder, decoder)
    model2 = model2.to(Config.device)

    optimizer = optim.Adam(model2.parameters(), lr=learning_rate)
    criterion = NLLLoss(ignore_index=1)  # ignore padding index

    # training epochs
    training_losses = []
    best_eval_loss = 100000
    for epoch in range(Config.epochs):
        logger.info(f"\n{datetime.now().strftime('%H:%M:%S')} | Training epoch {epoch+1} ...")
        logger.info("-" * 30)
        training_losses += train(model2, train_loader, optimizer, criterion, cn_vocab.vocabulary_size,
                  Config.grad_clip, teacher_forcing)
        logger.info(f"\n{datetime.now().strftime('%H:%M:%S')} | Evaluating epoch {epoch + 1} ...")
        logger.info("-" * 30)
        eval_loss = evaluate(model2, eval_loader, criterion, cn_vocab.vocabulary_size)
        print('eval_loss',eval_loss)
        if eval_loss < best_eval_loss:
            best_eval_loss = eval_loss
            logger.info(f"\n{datetime.now().strftime('%H:%M:%S')} | Saving model2 ...")
            torch.save(model2.state_dict(), "seq2seq.bin")
            with open('training_losses.bin', 'wb') as f:
                pickle.dump(training_losses, f)
        with open('eval_loss.bin', 'wb') as f:
            pickle.dump(eval_loss, f)
        sentence = "明天 要 出来 玩 吗"
        logger.info("\n[In] " + sentence)                  
        logger.info("[Out] " + translation(sentence, model2, Config.max_sentence_length, cn_vocab, cn_vocab))

        sentence = "脸大 适合 什么 发型"
        logger.info("\n[In] " + sentence)                  
        logger.info("[Out] " + translation(sentence, model2, Config.max_sentence_length, cn_vocab, cn_vocab))

        sentence = "如何 学 好 英语"
        logger.info("\n[In] " + sentence)                  
        logger.info("[Out] " + translation(sentence, model2, Config.max_sentence_length, cn_vocab, cn_vocab))


14:42:17 | Training epoch 1 ...

14:42:17 | Training epoch 1 ...

14:42:17 | Training epoch 1 ...

14:42:17 | Training epoch 1 ...
------------------------------
------------------------------
------------------------------
------------------------------


[300, 256, 0.5, 0.5, 32, 0.001]
目前参数情况为: 300 , 256 , 0.5 , 0.5 , 32 , 0.001


14:42:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.50
14:42:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.50
14:42:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.50
14:42:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.50
14:42:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.44
14:42:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.44
14:42:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.44
14:42:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.44
14:42:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.39
14:42:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.39
14:42:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.39
14:42:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.39
14:42:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.37
14:42:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.37
14:42:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.37
14:42:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.37
14:43:05 | Finished 52.8

eval_loss 0.282463613214085



[In] 脸大 适合 什么 发型

[In] 脸大 适合 什么 发型

[In] 脸大 适合 什么 发型

[In] 脸大 适合 什么 发型
[Out] 适合适合适合什么发型
[Out] 适合适合适合什么发型
[Out] 适合适合适合什么发型
[Out] 适合适合适合什么发型

[In] 如何 学 好 英语

[In] 如何 学 好 英语

[In] 如何 学 好 英语

[In] 如何 学 好 英语
[Out] 如何英语英语
[Out] 如何英语英语
[Out] 如何英语英语
[Out] 如何英语英语

14:43:57 | Training epoch 2 ...

14:43:57 | Training epoch 2 ...

14:43:57 | Training epoch 2 ...

14:43:57 | Training epoch 2 ...
------------------------------
------------------------------
------------------------------
------------------------------
14:44:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.26
14:44:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.26
14:44:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.26
14:44:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.26
14:44:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.25
14:44:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.25
14:44:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.25
14:44:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.25


eval_loss 0.24761443086948481



[In] 脸大 适合 什么 发型

[In] 脸大 适合 什么 发型
[Out] 脸大适合什么发型
[Out] 脸大适合什么发型
[Out] 脸大适合什么发型
[Out] 脸大适合什么发型

[In] 如何 学 好 英语

[In] 如何 学 好 英语

[In] 如何 学 好 英语

[In] 如何 学 好 英语
[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语

14:45:36 | Training epoch 3 ...

14:45:36 | Training epoch 3 ...

14:45:36 | Training epoch 3 ...

14:45:36 | Training epoch 3 ...
------------------------------
------------------------------
------------------------------
------------------------------
14:45:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.18
14:45:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.18
14:45:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.18
14:45:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.18
14:45:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.18
14:45:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.18
14:45:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.18
14:45:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.18
14:46:05 | Finished 31.8% | Mini-batch 301 | Avg

eval_loss 0.2417379962743903



[In] 脸大 适合 什么 发型

[In] 脸大 适合 什么 发型

[In] 脸大 适合 什么 发型
[Out] 脸大适合什么发型
[Out] 脸大适合什么发型
[Out] 脸大适合什么发型
[Out] 脸大适合什么发型

[In] 如何 学 好 英语

[In] 如何 学 好 英语

[In] 如何 学 好 英语

[In] 如何 学 好 英语
[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语

14:47:17 | Training epoch 4 ...

14:47:17 | Training epoch 4 ...

14:47:17 | Training epoch 4 ...

14:47:17 | Training epoch 4 ...
------------------------------
------------------------------
------------------------------
------------------------------
14:47:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.15
14:47:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.15
14:47:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.15
14:47:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.15
14:47:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.15
14:47:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.15
14:47:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.15
14:47:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.15
14:47:45 | Finished 31.8% | Mi

eval_loss 0.24451459009220564


14:49:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.13
14:49:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.13
14:49:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.13
14:49:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.13
14:49:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.13
14:49:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.13
14:49:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.13
14:49:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.13
14:49:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.13
14:49:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.13
14:49:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.13
14:49:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.13
14:49:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.12
14:49:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.12
14:49:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.12
14:49:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.12
14:49:43 | Finished 52.8

eval_loss 0.24934410824089887


14:50:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.12
14:50:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.12
14:50:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.12
14:50:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.12
14:50:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.11
14:50:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.11
14:50:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.11
14:50:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.11
14:51:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.11
14:51:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.11
14:51:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.11
14:51:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.11
14:51:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.11
14:51:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.11
14:51:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.11
14:51:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.11
14:51:21 | Finished 52.8

eval_loss 0.2554868944681214


14:52:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.10
14:52:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.10
14:52:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.10
14:52:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.10
14:52:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.10
14:52:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.10
14:52:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.10
14:52:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.10
14:52:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.10
14:52:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.10
14:52:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.10
14:52:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.10
14:52:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.10
14:52:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.10
14:52:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.10
14:52:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.10
14:53:00 | Finished 52.8

eval_loss 0.2608012283280472


14:54:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.09
14:54:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.09
14:54:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.09
14:54:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.09
14:54:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.09
14:54:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.09
14:54:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.09
14:54:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.09
14:54:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.09
14:54:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.09
14:54:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.09
14:54:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.09
14:54:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.09
14:54:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.09
14:54:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.09
14:54:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.09
14:54:38 | Finished 52.8

eval_loss 0.26670312200290075


14:55:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:55:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:55:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:55:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:55:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.08
14:55:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.08
14:55:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.08
14:55:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.08
14:55:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:55:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:55:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:55:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:56:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.08
14:56:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.08
14:56:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.08
14:56:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.08
14:56:17 | Finished 52.8

eval_loss 0.2752947344399334


14:57:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:57:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:57:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:57:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.08
14:57:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:57:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:57:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:57:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:57:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:57:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:57:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:57:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.08
14:57:46 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:57:46 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:57:46 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:57:46 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:57:56 | Finished 52.8

eval_loss 0.2818172905786448


14:58:57 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.07
14:58:57 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.07
14:58:57 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.07
14:58:57 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.07
14:59:06 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:59:06 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:59:06 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:59:06 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.07
14:59:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.07
14:59:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.07
14:59:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.07
14:59:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.07
14:59:25 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:59:25 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:59:25 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:59:25 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.07
14:59:34 | Finished 52.8

eval_loss 0.28989362710661337


15:00:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:00:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:00:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:00:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:00:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:00:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:00:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:00:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:00:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:00:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:00:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:00:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:01:03 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:01:03 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:01:03 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:01:03 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:01:13 | Finished 52.8

eval_loss 0.29653387624048


15:02:14 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:02:14 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:02:14 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:02:14 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.06
15:02:23 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:02:23 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:02:23 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:02:23 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.06
15:02:32 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:02:32 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:02:32 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:02:32 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:02:42 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:02:42 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:02:42 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:02:42 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.06
15:02:51 | Finished 52.8

eval_loss 0.3037632741992905


15:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.06
15:04:20 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:04:20 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:04:20 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:04:20 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:04:30 | Finished 52.8

eval_loss 0.31162101381408286


15:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:05:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:05:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:05:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:05:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:05:59 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:05:59 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:05:59 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:05:59 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:06:08 | Finished 52.8

eval_loss 0.3175878113614777


15:07:10 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:07:10 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:07:10 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:07:10 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.05
15:07:19 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:07:19 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:07:19 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:07:19 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.05
15:07:28 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:07:28 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:07:28 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:07:28 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.05
15:07:38 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:07:38 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:07:38 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:07:38 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.05
15:07:47 | Finished 52.8

eval_loss 0.3235203488010319


15:08:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:08:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:08:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:08:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:08:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:08:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:08:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:08:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:09:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:09:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:09:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:09:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:09:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:09:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:09:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:09:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:09:25 | Finished 52.8

eval_loss 0.329827299798034


15:10:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:10:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:10:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:10:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:10:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:10:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:10:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:10:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:10:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:10:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:10:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:10:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:10:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:10:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:10:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:10:55 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:11:04 | Finished 52.8

eval_loss 0.3354255581109422


------------------------------
15:12:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:12:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:12:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:12:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:12:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:12:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:12:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:12:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:12:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:12:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:12:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:12:24 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:12:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:12:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:12:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:12:33 | Finished 42.3% | Mini-batch 401 | Avg Loss:

eval_loss 0.3417241589417926


15:13:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:13:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:13:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:13:44 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.04
15:13:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:13:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:13:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:13:53 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.04
15:14:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:14:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:14:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:14:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.04
15:14:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:14:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:14:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:14:12 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.04
15:14:21 | Finished 52.8

eval_loss 0.3479354497714545


------------------------------
15:15:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:15:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:15:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:15:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:15:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:15:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:15:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:15:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:15:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:15:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:15:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:15:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:15:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:15:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:15:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:15:51 | Finished 42.3% | Mini-batch 401 | Avg Loss:

eval_loss 0.35229321532802305


15:17:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:17:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:17:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:17:01 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:17:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:17:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:17:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:17:10 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:17:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:17:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:17:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:17:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:17:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:17:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:17:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:17:29 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:17:39 | Finished 52.8

eval_loss 0.3571597925870573


------------------------------
------------------------------
15:18:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:18:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:18:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:18:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:18:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:18:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:18:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:18:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:18:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:18:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:18:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:18:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:19:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:19:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:19:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:19:08 | Finished 42.

eval_loss 0.359777137452513


15:20:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:20:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:20:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:20:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:20:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:20:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:20:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:20:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:20:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:20:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:20:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:20:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:20:47 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:20:47 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:20:47 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:20:47 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:20:57 | Finished 52.8

eval_loss 0.3649253156834694


15:21:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:21:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:21:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:21:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:22:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:22:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:22:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:22:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:22:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:22:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:22:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:22:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:22:26 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:22:26 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:22:26 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:22:26 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:22:35 | Finished 52.8

eval_loss 0.37056722536207776



15:23:27 | Training epoch 26 ...

15:23:27 | Training epoch 26 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:23:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:23:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:23:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:23:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:23:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:23:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:23:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:23:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:23:55 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:23:55 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:23:55 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:23:55 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:24:05 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:24:05 | Fini

eval_loss 0.3743565960972326


15:25:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:25:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:25:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:25:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:25:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:25:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:25:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:25:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:25:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:25:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:25:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:25:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:25:43 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:25:43 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:25:43 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:25:43 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.03
15:25:52 | Finished 52.8

eval_loss 0.3809255405370522



15:26:44 | Training epoch 28 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:26:53 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:26:53 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:26:53 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:26:53 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:27:02 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:27:02 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:27:02 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:27:02 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.03
15:27:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:27:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:27:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:27:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:27:21 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:27:21 | Finished 42.3% | Mini-batch 401 | Avg 

eval_loss 0.38421982770202556



15:28:22 | Training epoch 29 ...

15:28:22 | Training epoch 29 ...

15:28:22 | Training epoch 29 ...

15:28:22 | Training epoch 29 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:28:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:28:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:28:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:28:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.03
15:28:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:28:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:28:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:28:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:28:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:28:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:28:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:28:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.03
15:28:59

eval_loss 0.3879069569994816


15:30:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:30:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:30:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:30:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:30:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:30:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:30:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:30:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:30:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:30:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:30:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:30:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:30:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:30:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:30:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:30:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:30:46 | Finished 52.8

eval_loss 0.3922984451981216


15:31:47 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:31:47 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:31:47 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:31:47 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:31:56 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:31:56 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:31:56 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:31:56 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:32:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:32:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:32:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:32:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:32:15 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:32:15 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:32:15 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:32:15 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:32:24 | Finished 52.8

eval_loss 0.3952600943921998



15:33:15 | Training epoch 32 ...

15:33:15 | Training epoch 32 ...

15:33:15 | Training epoch 32 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:33:25 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:33:25 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:33:25 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:33:25 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:33:34 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:33:34 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:33:34 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:33:34 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:33:43 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:33:43 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:33:43 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:33:43 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:33:52 | Finished 42.3% | Mini-batch 401

eval_loss 0.4000191829074489


15:35:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:35:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:35:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:35:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:35:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:35:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:35:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:35:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:35:21 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:35:21 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:35:21 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:35:21 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:35:30 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:35:30 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:35:30 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:35:30 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:35:39 | Finished 52.8

eval_loss 0.4045852600272972


15:36:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:36:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:36:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:36:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:36:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:36:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:36:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:36:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:36:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:36:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:36:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:36:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:37:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:37:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:37:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:37:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:37:17 | Finished 52.8

eval_loss 0.40382592530029604



15:38:08 | Training epoch 35 ...

15:38:08 | Training epoch 35 ...

15:38:08 | Training epoch 35 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:38:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:38:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:38:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:38:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:38:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:38:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:38:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:38:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:38:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:38:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:38:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:38:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:38:45 | Finished 42.3% | Mini-batch 401

eval_loss 0.41071266580167043



15:39:46 | Training epoch 36 ...

15:39:46 | Training epoch 36 ...

15:39:46 | Training epoch 36 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:39:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:39:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:39:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:39:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:40:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:40:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:40:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:40:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:40:14 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:40:14 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:40:14 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:40:14 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:40:23 | Finished 42.3% | Mini-batch 401

eval_loss 0.411723372640354


------------------------------
------------------------------
------------------------------
15:41:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:41:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:41:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:41:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:41:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:41:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:41:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:41:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:41:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:41:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:41:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:41:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:42:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:42:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:42:01 | Finished 42.3% | Mini-batch 401 | Avg Loss

eval_loss 0.41812666064197646


15:43:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:43:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:43:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:43:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:43:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:43:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:43:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:43:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:43:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:43:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:43:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:43:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:43:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:43:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:43:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:43:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:43:48 | Finished 52.8

eval_loss 0.41628533031981546


15:44:49 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:44:49 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:44:49 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:44:49 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:44:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:44:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:44:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:44:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:45:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:45:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:45:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:45:07 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:45:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:45:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:45:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:45:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:45:25 | Finished 52.8

eval_loss 0.4207046364787408



15:46:17 | Training epoch 40 ...

15:46:17 | Training epoch 40 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:46:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:46:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:46:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:46:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:46:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:46:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:46:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:46:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:46:44 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:46:44 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:46:44 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:46:44 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:46:54 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:46:54 | Fini

eval_loss 0.42661710575322015


15:48:03 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:48:03 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:48:03 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:48:03 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:48:12 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:48:12 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:48:12 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:48:12 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:48:22 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:48:22 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:48:22 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:48:22 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:48:31 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:48:31 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:48:31 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:48:31 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:48:40 | Finished 52.8

eval_loss 0.4278010569284546


15:49:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:49:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:49:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:49:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:49:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:49:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:49:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:49:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:49:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:49:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:49:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:49:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:50:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:50:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:50:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:50:08 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:50:17 | Finished 52.8

eval_loss 0.42938956394407324



15:51:08 | Training epoch 43 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:51:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:51:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:51:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:51:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:51:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:51:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:51:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:51:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:51:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:51:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:51:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:51:36 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:51:45 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:51:45 | Finished 42.3% | Mini-batch 401 | Avg 

eval_loss 0.4331392376740454


15:52:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:52:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:52:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:52:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:53:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:53:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:53:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:53:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:53:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:53:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:53:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:53:12 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:53:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:53:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:53:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:53:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:53:31 | Finished 52.8

eval_loss 0.4361041313887417


15:54:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:54:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:54:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:54:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:54:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
15:54:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
15:54:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
15:54:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
15:54:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:54:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:54:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:54:49 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:54:58 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:54:58 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:54:58 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:54:58 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:55:07 | Finished 52.8

eval_loss 0.4382291122220696


15:56:08 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:56:08 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:56:08 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:56:08 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:56:17 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:56:17 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:56:17 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:56:17 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:56:26 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:56:26 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:56:26 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:56:26 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:56:35 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:56:35 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:56:35 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:56:35 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.02
15:56:45 | Finished 52.8

eval_loss 0.44068250946911086


15:57:45 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:57:45 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:57:45 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:57:45 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:57:54 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:57:54 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:57:54 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:57:54 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:58:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:58:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:58:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:58:03 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:58:13 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
15:58:13 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
15:58:13 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
15:58:13 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
15:58:22 | Finished 52.8

eval_loss 0.4436930971792287



15:59:13 | Training epoch 48 ...

15:59:13 | Training epoch 48 ...

15:59:13 | Training epoch 48 ...

15:59:13 | Training epoch 48 ...
------------------------------
------------------------------
------------------------------
------------------------------
15:59:22 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:59:22 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:59:22 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:59:22 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
15:59:31 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:59:31 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:59:31 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:59:31 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.02
15:59:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:59:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:59:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:59:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
15:59:50

eval_loss 0.44363179957987553



16:00:50 | Training epoch 49 ...

16:00:50 | Training epoch 49 ...

16:00:50 | Training epoch 49 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:01:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:01:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:01:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:01:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:01:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:01:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:01:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:01:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:01:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:01:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:01:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:01:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:01:27 | Finished 42.3% | Mini-batch 401

eval_loss 0.44833144111679063


[Out] 如何学好的英语
[Out] 如何学好的英语
[Out] 如何学好的英语
[Out] 如何学好的英语

16:02:28 | Training epoch 50 ...

16:02:28 | Training epoch 50 ...

16:02:28 | Training epoch 50 ...

16:02:28 | Training epoch 50 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:02:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:02:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:02:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:02:37 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:02:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:02:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:02:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:02:46 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:02:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:02:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:02:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:02:56 | Fi

eval_loss 0.4507916436593839


[Out] 如何学好的英语
[Out] 如何学好的英语
[Out] 如何学好的英语
[Out] 如何学好的英语

16:04:06 | Training epoch 51 ...

16:04:06 | Training epoch 51 ...

16:04:06 | Training epoch 51 ...

16:04:06 | Training epoch 51 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:04:16 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:04:16 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:04:16 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:04:16 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:04:25 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:04:25 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:04:25 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:04:25 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:04:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:04:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:04:34 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:04:34 | Fi

eval_loss 0.4512048678841948


16:05:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:05:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:05:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:05:55 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:06:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:06:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:06:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:06:04 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:06:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:06:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:06:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:06:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:06:23 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:06:23 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:06:23 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:06:23 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:06:32 | Finished 52.8

eval_loss 0.45514659374420025


16:07:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:07:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:07:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:07:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:07:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:07:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:07:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:07:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:07:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:07:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:07:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:07:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:08:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:08:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:08:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:08:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:08:11 | Finished 52.8

eval_loss 0.4597754267753188


[Out] 如何学好学
[Out] 如何学好学
[Out] 如何学好学

16:09:03 | Training epoch 54 ...

16:09:03 | Training epoch 54 ...

16:09:03 | Training epoch 54 ...

16:09:03 | Training epoch 54 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:09:12 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:09:12 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:09:12 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:09:12 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:09:21 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:09:21 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:09:21 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:09:21 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:09:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:09:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:09:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:09:31 | Finished 31.8% | Mini-

eval_loss 0.45969943477519615



16:10:41 | Training epoch 55 ...

16:10:41 | Training epoch 55 ...

16:10:41 | Training epoch 55 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:10:51 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:10:51 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:10:51 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:10:51 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:11:00 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:11:00 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:11:00 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:11:00 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:11:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:11:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:11:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:11:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:11:19 | Finished 42.3% | Mini-batch 401

eval_loss 0.46061916253778373


[Out] 如何学好的英语
[Out] 如何学好的英语
[Out] 如何学好的英语
[Out] 如何学好的英语

16:12:20 | Training epoch 56 ...

16:12:20 | Training epoch 56 ...

16:12:20 | Training epoch 56 ...

16:12:20 | Training epoch 56 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:12:30 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:12:30 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:12:30 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:12:30 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:12:39 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:12:39 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:12:39 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:12:39 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:12:48 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:12:48 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:12:48 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:12:48 | Fi

eval_loss 0.4634156750634624


16:14:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:14:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:14:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:14:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.02
16:14:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:14:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:14:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:14:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:14:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:14:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:14:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:14:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:14:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:14:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:14:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:14:37 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:14:46 | Finished 52.8

eval_loss 0.46599045730624883


[Out] 如何学好学
[Out] 如何学好学

16:15:38 | Training epoch 58 ...

16:15:38 | Training epoch 58 ...

16:15:38 | Training epoch 58 ...

16:15:38 | Training epoch 58 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:15:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:15:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:15:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:15:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:15:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:15:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:15:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:15:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:16:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:16:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:16:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:16:06 | Finished 31.8% | Mini-batch 301 | 

eval_loss 0.46994024079366514


16:17:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:17:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:17:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:17:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:17:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:17:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:17:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:17:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:17:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:17:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:17:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:17:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:17:54 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:17:54 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:17:54 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:17:54 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:18:03 | Finished 52.8

eval_loss 0.46566994426169467


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

16:18:55 | Training epoch 60 ...

16:18:55 | Training epoch 60 ...

16:18:55 | Training epoch 60 ...

16:18:55 | Training epoch 60 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:19:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:19:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:19:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:19:05 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:19:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:19:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:19:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:19:14 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:19:23 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:19:23 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:19:23 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.02
16:19:23 | Finished 31.8%

eval_loss 0.47010032226075443



16:20:34 | Training epoch 61 ...

16:20:34 | Training epoch 61 ...

16:20:34 | Training epoch 61 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:20:43 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:20:43 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:20:43 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:20:43 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:20:52 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:20:52 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:20:52 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:20:52 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:21:02 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:21:02 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:21:02 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:21:02 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:21:11 | Finished 42.3% | Mini-batch 401

eval_loss 0.4699839666708558


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

16:22:13 | Training epoch 62 ...

16:22:13 | Training epoch 62 ...

16:22:13 | Training epoch 62 ...

16:22:13 | Training epoch 62 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:22:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:22:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:22:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:22:23 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:22:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:22:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:22:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:22:32 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:22:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:22:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:22:41 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:22:41 | Finished 31.8%

eval_loss 0.47594443516924434


[Out] 如何学好
[Out] 如何学好

16:23:52 | Training epoch 63 ...

16:23:52 | Training epoch 63 ...

16:23:52 | Training epoch 63 ...

16:23:52 | Training epoch 63 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:24:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:24:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:24:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:24:02 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:24:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:24:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:24:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:24:11 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:24:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:24:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:24:20 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:24:20 | Finished 31.8% | Mini-batch 301 | Av

eval_loss 0.47733180186248364


16:25:41 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:25:41 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:25:41 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:25:41 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:25:50 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:25:50 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:25:50 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:25:50 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:26:00 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:26:00 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:26:00 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:26:00 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:26:09 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:26:09 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:26:09 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:26:09 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:26:18 | Finished 52.8

eval_loss 0.4799200875365669


[Out] 如何学好

16:27:10 | Training epoch 65 ...

16:27:10 | Training epoch 65 ...

16:27:10 | Training epoch 65 ...

16:27:10 | Training epoch 65 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:27:20 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:27:20 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:27:20 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:27:20 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:27:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:27:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:27:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:27:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:27:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:27:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:27:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:27:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.

eval_loss 0.4807242125027172


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

16:28:50 | Training epoch 66 ...

16:28:50 | Training epoch 66 ...

16:28:50 | Training epoch 66 ...

16:28:50 | Training epoch 66 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:29:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:29:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:29:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:29:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:29:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:29:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:29:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:29:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:29:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:29:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:29:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:29:19 | Finished 31.8%

eval_loss 0.4833310898784878


[Out] 如何学好
[Out] 如何学好

16:30:30 | Training epoch 67 ...

16:30:30 | Training epoch 67 ...

16:30:30 | Training epoch 67 ...

16:30:30 | Training epoch 67 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:30:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:30:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:30:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:30:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:30:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:30:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:30:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:30:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:30:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:30:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:30:58 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:30:58 | Finished 31.8% | Mini-batch 301 | Av

eval_loss 0.4818259299219774


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

16:32:11 | Training epoch 68 ...

16:32:11 | Training epoch 68 ...

16:32:11 | Training epoch 68 ...

16:32:11 | Training epoch 68 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:32:21 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:32:21 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:32:21 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:32:21 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:32:30 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:32:30 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:32:30 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:32:30 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:32:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:32:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:32:39 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:32:39 | Finished 31.8%

eval_loss 0.48340920371889645



16:33:50 | Training epoch 69 ...

16:33:50 | Training epoch 69 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:34:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:34:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:34:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:34:00 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:34:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:34:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:34:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:34:09 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:34:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:34:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:34:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:34:19 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:34:28 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:34:28 | Fini

eval_loss 0.4883472682768974



16:35:30 | Training epoch 70 ...

16:35:30 | Training epoch 70 ...

16:35:30 | Training epoch 70 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:35:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:35:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:35:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:35:40 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:35:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:35:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:35:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:35:49 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:35:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:35:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:35:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:35:59 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:36:08 | Finished 42.3% | Mini-batch 401

eval_loss 0.4883376961242581


16:37:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:37:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:37:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:37:19 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:37:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:37:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:37:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:37:29 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:37:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:37:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:37:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:37:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:37:48 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:37:48 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:37:48 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:37:48 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:37:57 | Finished 52.8

eval_loss 0.49113019158006055


16:38:59 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:38:59 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:38:59 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:38:59 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:39:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:39:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:39:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:39:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:39:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:39:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:39:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:39:18 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:39:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:39:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:39:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:39:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:39:37 | Finished 52.8

eval_loss 0.4907280302313369


16:40:39 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:40:39 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:40:39 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:40:39 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:40:48 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:40:48 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:40:48 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:40:48 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:40:57 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:40:57 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:40:57 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:40:57 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:41:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:41:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:41:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:41:07 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:41:16 | Finished 52.8

eval_loss 0.49633533403326724


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

16:42:09 | Training epoch 74 ...

16:42:09 | Training epoch 74 ...

16:42:09 | Training epoch 74 ...

16:42:09 | Training epoch 74 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:42:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:42:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:42:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:42:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:42:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:42:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:42:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:42:27 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:42:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:42:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:42:37 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:42:37 | Finished 31.8%

eval_loss 0.49436775313787595



16:43:48 | Training epoch 75 ...

16:43:48 | Training epoch 75 ...

16:43:48 | Training epoch 75 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:43:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:43:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:43:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:43:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:44:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:44:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:44:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:44:07 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:44:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:44:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:44:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:44:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:44:26 | Finished 42.3% | Mini-batch 401

eval_loss 0.4963452616141267


[Out] 如何学好还是英语
[Out] 如何学好还是英语
[Out] 如何学好还是英语
[Out] 如何学好还是英语

16:45:28 | Training epoch 76 ...

16:45:28 | Training epoch 76 ...

16:45:28 | Training epoch 76 ...

16:45:28 | Training epoch 76 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:45:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:45:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:45:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:45:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:45:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:45:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:45:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:45:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:45:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:45:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:45:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:45:56 

eval_loss 0.4999672606847712


[Out] 如何学好还是英语
[Out] 如何学好还是英语
[Out] 如何学好还是英语
[Out] 如何学好还是英语

16:47:08 | Training epoch 77 ...

16:47:08 | Training epoch 77 ...

16:47:08 | Training epoch 77 ...

16:47:08 | Training epoch 77 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:47:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:47:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:47:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:47:18 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:47:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:47:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:47:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:47:28 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:47:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:47:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:47:38 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:47:38 

eval_loss 0.5022171057456293


16:48:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:48:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:48:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:48:58 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:49:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:49:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:49:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:49:08 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:49:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:49:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:49:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:49:17 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:49:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:49:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:49:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:49:27 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:49:36 | Finished 52.8

eval_loss 0.5029209077442753


[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语

16:50:28 | Training epoch 79 ...

16:50:28 | Training epoch 79 ...

16:50:28 | Training epoch 79 ...

16:50:28 | Training epoch 79 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:50:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:50:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:50:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:50:38 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:50:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:50:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:50:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:50:47 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:50:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:50:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:50:56 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:50:56 | Finished 31.8% | Mini-

eval_loss 0.504873673023367


16:52:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:52:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:52:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:52:17 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:52:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:52:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:52:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:52:26 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:52:35 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:52:35 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:52:35 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:52:35 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:52:45 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:52:45 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:52:45 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:52:45 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:52:54 | Finished 52.8

eval_loss 0.5032556058386977


[Out] 如何学好
[Out] 如何学好

16:53:46 | Training epoch 81 ...

16:53:46 | Training epoch 81 ...

16:53:46 | Training epoch 81 ...

16:53:46 | Training epoch 81 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:53:56 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:53:56 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:53:56 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:53:56 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:54:05 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:54:05 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:54:05 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:54:05 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:54:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:54:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:54:15 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:54:15 | Finished 31.8% | Mini-batch 301 | Av

eval_loss 0.5063433301170248


------------------------------
------------------------------
------------------------------
16:55:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:55:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:55:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:55:35 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:55:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:55:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:55:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:55:44 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:55:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:55:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:55:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:55:54 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:56:04 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:56:04 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:56:04 | Finished 42.3% | Mini-batch 401 | Avg Loss

eval_loss 0.505511587162691


[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语
[Out] 如何学英语

16:57:05 | Training epoch 83 ...

16:57:05 | Training epoch 83 ...

16:57:05 | Training epoch 83 ...

16:57:05 | Training epoch 83 ...
------------------------------
------------------------------
------------------------------
------------------------------
16:57:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:57:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:57:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:57:15 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:57:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:57:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:57:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:57:24 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:57:33 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:57:33 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:57:33 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:57:33 | Finished 3

eval_loss 0.5104620133674036


16:58:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:58:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:58:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:58:54 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
16:59:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:59:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:59:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:59:03 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
16:59:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:59:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:59:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:59:13 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
16:59:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:59:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:59:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:59:22 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
16:59:31 | Finished 52.8

eval_loss 0.507626928663593


17:00:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:00:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:00:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:00:33 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:00:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:00:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:00:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:00:42 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:00:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:00:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:00:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:00:52 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:01:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:01:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:01:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:01:01 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:01:11 | Finished 52.8

eval_loss 0.509518949153064


17:02:13 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:02:13 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:02:13 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:02:13 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:02:22 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:02:22 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:02:22 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:02:22 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:02:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:02:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:02:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:02:31 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:02:41 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:02:41 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:02:41 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:02:41 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:02:50 | Finished 52.8

eval_loss 0.5131893155548484


[Out] 如何学英语
[Out] 如何学英语

17:03:42 | Training epoch 87 ...

17:03:42 | Training epoch 87 ...

17:03:42 | Training epoch 87 ...

17:03:42 | Training epoch 87 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:03:52 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:04:01 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:04:11 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:04:11 | Finished 31.8% | Mini-batch 301 | 

eval_loss 0.510231828844121


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

17:05:22 | Training epoch 88 ...

17:05:22 | Training epoch 88 ...

17:05:22 | Training epoch 88 ...

17:05:22 | Training epoch 88 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:05:31 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:05:40 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:05:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:05:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:05:50 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:05:50 | Finished 31.8%

eval_loss 0.5143095169964095


17:07:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:07:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:07:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:07:11 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:07:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:07:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:07:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:07:20 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:07:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:07:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:07:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:07:30 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:07:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:07:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:07:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:07:39 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:07:48 | Finished 52.8

eval_loss 0.51643075787492



17:08:40 | Training epoch 90 ...

17:08:40 | Training epoch 90 ...

17:08:40 | Training epoch 90 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:08:50 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:08:50 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:08:50 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:08:50 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:08:59 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:08:59 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:08:59 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:08:59 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:09:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:09:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:09:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:09:09 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:09:18 | Finished 42.3% | Mini-batch 401

eval_loss 0.5125911357137624


[Out] 如何学好
[Out] 如何学好
[Out] 如何学好
[Out] 如何学好

17:10:19 | Training epoch 91 ...

17:10:19 | Training epoch 91 ...

17:10:19 | Training epoch 91 ...

17:10:19 | Training epoch 91 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:10:29 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:10:29 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:10:29 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:10:29 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:10:38 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:10:38 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:10:38 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:10:38 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:10:48 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:10:48 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:10:48 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:10:48 | Finished 31.8%

eval_loss 0.5149025338153241



17:11:59 | Training epoch 92 ...

17:11:59 | Training epoch 92 ...

17:11:59 | Training epoch 92 ...

17:11:59 | Training epoch 92 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:12:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:12:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:12:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:12:09 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:12:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:12:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:12:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:12:18 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:12:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:12:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:12:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:12:27 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:12:36

eval_loss 0.5169190648966971


17:13:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:13:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:13:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:13:48 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:13:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:13:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:13:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:13:57 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:14:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:14:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:14:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:14:06 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:14:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:14:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:14:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:14:16 | Finished 42.3% | Mini-batch 401 | Avg Loss:  0.01
17:14:25 | Finished 52.8

eval_loss 0.5162457936861659


[Out] 怎么学好
[Out] 怎么学好
[Out] 怎么学好
[Out] 怎么学好

17:15:17 | Training epoch 94 ...

17:15:17 | Training epoch 94 ...

17:15:17 | Training epoch 94 ...

17:15:17 | Training epoch 94 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:15:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:15:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:15:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:15:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:15:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:15:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:15:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:15:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:15:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:15:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:15:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:15:45 | Finished 31.8%

eval_loss 0.5193068530661329


[Out] 怎么学好英语

17:16:56 | Training epoch 95 ...

17:16:56 | Training epoch 95 ...

17:16:56 | Training epoch 95 ...

17:16:56 | Training epoch 95 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:17:06 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:17:06 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:17:06 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:17:06 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:17:15 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:17:15 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:17:15 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:17:15 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:17:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:17:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:17:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:17:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  

eval_loss 0.5202212723064676


[Out] 怎么学好
[Out] 怎么学好
[Out] 怎么学好
[Out] 怎么学好

17:18:36 | Training epoch 96 ...

17:18:36 | Training epoch 96 ...

17:18:36 | Training epoch 96 ...

17:18:36 | Training epoch 96 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:18:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:18:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:18:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:18:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:18:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:18:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:18:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:18:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:19:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:19:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:19:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:19:05 | Finished 31.8%

eval_loss 0.520661317977208


[Out] 如何学英语好
[Out] 如何学英语好
[Out] 如何学英语好
[Out] 如何学英语好

17:20:17 | Training epoch 97 ...

17:20:17 | Training epoch 97 ...

17:20:17 | Training epoch 97 ...

17:20:17 | Training epoch 97 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:20:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:20:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:20:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:20:27 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:20:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:20:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:20:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:20:36 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:20:46 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:20:46 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:20:46 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:20:46 | Finish

eval_loss 0.5206549898552084


[Out] <UNK>的英语好
[Out] <UNK>的英语好
[Out] <UNK>的英语好
[Out] <UNK>的英语好

17:21:57 | Training epoch 98 ...

17:21:57 | Training epoch 98 ...

17:21:57 | Training epoch 98 ...

17:21:57 | Training epoch 98 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:22:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:22:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:22:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:22:07 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:22:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:22:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:22:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:22:16 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:22:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:22:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:22:25 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:22

eval_loss 0.5219150860632875


[Out] 如何学英语好
[Out] 如何学英语好
[Out] 如何学英语好
[Out] 如何学英语好

17:23:37 | Training epoch 99 ...

17:23:37 | Training epoch 99 ...

17:23:37 | Training epoch 99 ...

17:23:37 | Training epoch 99 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:23:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:23:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:23:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:23:46 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:23:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:23:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:23:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:23:55 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:24:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:24:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:24:05 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:24:05 | Finish

eval_loss 0.521427924331078


[Out] 好的英语好
[Out] 好的英语好
[Out] 好的英语好
[Out] 好的英语好

17:25:16 | Training epoch 100 ...

17:25:16 | Training epoch 100 ...

17:25:16 | Training epoch 100 ...

17:25:16 | Training epoch 100 ...
------------------------------
------------------------------
------------------------------
------------------------------
17:25:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:25:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:25:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:25:26 | Finished 10.7% | Mini-batch 101 | Avg Loss:  0.01
17:25:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:25:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:25:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:25:35 | Finished 21.2% | Mini-batch 201 | Avg Loss:  0.01
17:25:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:25:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:25:45 | Finished 31.8% | Mini-batch 301 | Avg Loss:  0.01
17:25:45 | Finish

eval_loss 0.522228732979889


In [None]:
model2

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(10000, 300)
    (lstm): LSTM(300, 256, num_layers=2, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(10000, 300)
    (lstm): LSTM(556, 256, num_layers=2, dropout=0.5)
    (attention): Attention(
      (attn1): Linear(in_features=512, out_features=256, bias=True)
      (attn2): Linear(in_features=256, out_features=1, bias=True)
    )
    (out): Linear(in_features=512, out_features=10000, bias=True)
  )
)

In [None]:
torch.save(model.state_dict(), "test22.bin")

In [None]:
f=open('/content/drive/MyDrive/测试原句子.txt','r',encoding='utf-8')
test_lst=[]
lines = f.readlines()      #读取全部内容 ，并以列表方式返回
for line in lines:
    line=line.replace('\n','')
    line=line.replace('\t','')
    test_lst.append(line)

result2=[]
count=0
for word in test_lst:
  word = " ".join(word for word in jieba.lcut(word))
  resu = translation(word, model2, Config.max_sentence_length, cn_vocab, cn_vocab)
  print(word,' ',resu)
  temp=word+' '+resu
  result2.append(temp)
  count+=1
  if count % 100 == 0:
    f=open('result2.txt','w',encoding='utf-8')
    for line in result2:
      f.write(line+'\n')

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
父亲 给 我 上 的 一堂课 阅读 答案   父亲给我上的<UNK>阅读答案
怎么 看 淘宝 年度 账单   淘宝年度看账单怎么
带字 女生 头像   女生头像头像
性价比 较 高 的 平板 电脑   是<UNK>平板电脑电脑
成龙 最新 电影 叫 什么   成龙最新电影电影什么
奶粉 什么 牌子 比较 好 呀   什么牌子的奶粉好
天涯 明月刀 电视剧 好看 吗 ？ 大家   天涯明月刀到底到底有人说他
虎落平阳 一 ( ) 歇后语   虎落平阳歇后语
手机 有 什么 软件 可以 赚钱   手机有什么软件可以赚钱
怎么 才能 去掉 黑痣   <UNK>怎么去掉
怎么 查找 同名同姓 的 人   如何查找同名同姓的人
脸上 的 麻子 怎么 去掉   脸上有麻子怎么去掉
镜子 和 照片 哪个 更 真实 些   照片和照片哪个更真实
迅雷 继续 下载 没 资源   迅雷下载资源不足
孙俪 和 邓超 离婚 吗   孙俪和邓超离婚了吗
油烟机 哪个 牌子 好   什么牌子油烟机和好
能 看 下 红包 的 使用 范围 么   什么红包的红包最好
我 的 世界 电脑 版 如何 下载   我的世界电脑版怎么下载啊
亵渎 是 什么 意思   <UNK>什么意思
补肾 该 吃 什么 食物 啊   吃什么食物补肾
如何 登陆 新浪 博客   怎么登陆新浪游戏
呵呵 你 一脸 什么 意思   呵呵你一脸什么意思
如何 查询 驾驶证 真假   如何查询驾驶证真假
怎样 使 大腿 变瘦   怎样让让历史变瘦
情人节 送 什么 礼物 给 男朋友   情人节送什么礼物給男朋友
我 怎么 才 可以 注册 谷歌 账户   怎么才可以注册自己谷歌账户
天天 飞车 怎样 开挂   咋下载天天飞车
魔兽 世界 为什么 进不去 啊   魔兽世界为什么进不去为什么
最 吝啬 的 人 打 一 成语   最吝啬的人猜
这是 怎样 的 漫画 风格 呢   这是什么漫画风格
财富 值 怎么 获取   怎样获取财富值
全民 飞机 大战 宠物 哪个 好   全民大战飞机宠物哪个好
什么 时候 天气 回暖 了   天气什么时候能回暖啊?冷死了
女士 手表 有 哪些 品牌   品牌女士手表有哪些
如何 才能 删除 淘宝 帐号   淘宝如何删除才