## 机器翻译作业

In [1]:
# 因在colab进行，设置可访问drive里文件
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


本次作业使用 seq2seq模型完成英文到中文的机器翻译任务，请补充缺失部分的代码

In [2]:
#导入常用软件包
import torch
import sys
from torch import nn, optim
import random
from torch.nn.functional import softmax
import re
import os

from torchtext.legacy import data
# from torchtext.data import Iterator, BucketIterator
from torchtext.legacy.data import Iterator, BucketIterator
import torch.nn.functional as F
from collections import defaultdict
import string
import dill

from tqdm import tqdm

## 1.数据加载

数据：包含六个文件：
1. 训练集：train.seg.en.txt train.seg.zh.txt （11743条）
2. 验证集：dev.seg.en.txt dev.seg.zh.txt （2936条）
3. 测试集：test.seg.en.txt test.seg.zh.txt （5194条）

使用torchtext完成数据的加载，主要使用以下三个组件：

1. Field :主要包含以下数据预处理的配置信息，比如指定分词方法，是否转成小写，起始字符，结束字符，补全字符以及词典等等

2. Dataset :继承自pytorch的Dataset，用于加载数据，提供了TabularDataset可以指点路径，格式，Field信息就可以方便的完成数据加载。同时torchtext还提供预先构建的常用数据集的Dataset对象，可以直接加载使用，splits方法可以同时加载训练集，验证集和测试集。

3. Iterator : 主要是数据输出的模型的迭代器，可以支持batch定制

具体可以参照：https://pytorch.org/text/stable/index.html
关于torchtext的安装可以参照：https://github.com/pytorch/text

In [3]:
class Dataloader:
    def __init__(self, batch_size, device, eval=False):
        raw_data = self.read_data("/content/drive/MyDrive/Colab Notebooks/python/data/", test=eval)
        ## 训练模式
        if not eval:
            train_data, dev_data = raw_data
            ##定义数据字段
            self.id_field = data.Field(sequential=False, use_vocab=False)
            self.en_field = data.Field(init_token='<sos>', eos_token='<eos>', lower=True, include_lengths=True)
            self.zh_field = data.Field(init_token='<sos>', eos_token='<eos>', lower=True)
            self.fields = [("id", self.id_field), ("en", self.en_field), ("zh", self.zh_field)]

            ##构建数据集
            train_dataset = data.Dataset([data.Example.fromlist([idx, item[0], item[1]], self.fields) for idx, item in enumerate(train_data)], self.fields)
            dev_dataset =  data.Dataset([data.Example.fromlist([idx, item[0], item[1]], self.fields) for idx, item in enumerate(dev_data)], self.fields)
            
            ##构建数据迭代器
            self.train_iterator= BucketIterator(train_dataset, batch_size=batch_size, device=device, sort_key=lambda x: len(x.en), sort_within_batch=True)
            self.dev_iterator= BucketIterator(dev_dataset, batch_size=batch_size, device=device, sort_key=lambda x: len(x.en), sort_within_batch=True)
            
            ##构建词典
            self.en_field.build_vocab(train_dataset, min_freq=2)
            self.zh_field.build_vocab(train_dataset, min_freq=2)
            
            ##存储字段
            dill.dump(self.en_field, open("/content/drive/MyDrive/Colab Notebooks/python/model/EN.Field", "wb"))
            dill.dump(self.zh_field, open("/content/drive/MyDrive/Colab Notebooks/python/model/ZH.Field", "wb"))

            print("en vocab size:", len(self.en_field.vocab.itos),"zh vocab size:", len(self.zh_field.vocab.itos))
        
        ## 测试模式  
        else:
            test_data = raw_data[-1]
            ##加载存储的字段
            self.id_field = data.Field(sequential=False, use_vocab=False)
            self.en_field = dill.load(open("/content/drive/MyDrive/Colab Notebooks/python/model/EN.Field", "rb"))
            self.zh_field = dill.load(open("/content/drive/MyDrive/Colab Notebooks/python/model/ZH.Field", "rb"))
            self.fields = [("id", self.id_field), ("en", self.en_field), ("zh", self.zh_field)]
            
            ##构建测试集 & 迭代器
            test_data = data.Dataset([data.Example.fromlist([idx, item[0], item[1]], self.fields) for idx, item in enumerate(test_data)], self.fields)
            self.test_iterator= BucketIterator(test_data, batch_size=batch_size, device=device, train = False, sort_key=lambda x: len(x.en), sort_within_batch = True)   
        
    ##从文件中读取数据
    def read_data(self, path, test=True, lang1='en', lang2 = 'zh'):
        data = []
        types = ['test'] if test else ['train', 'dev']
        # print(types)
        for type in types:
            sub_data = []
            with open(f"{path}/{type}.seg.{lang1}.txt", encoding='utf-8') as f1, open(f"{path}/{type}.seg.{lang2}.txt", encoding='utf-8') as f2:
                for src, trg in zip(f1, f2):
                    if len(src) > MAX_LEN and len(trg) > MAX_LEN:
                        continue
                    sub_data.append((src.strip(), trg.strip()))
            data.append(sub_data)

        return data


## 2.模型构建
使用seq2seq模型完成机器翻译模型的搭建,可以参考 pytorch tutorials https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

这里需要完成的是一个seq2seq+attention的基础模型，有余力的可以进行更多的尝试
相关的论文：
1. attention:https://arxiv.org/abs/1409.0473
2. copy:https://arxiv.org/abs/1603.06393
3. coverage:https://arxiv.org/abs/1601.04811

### 2.1 Encoder
这里为了简化代码，embedding_size和hidden_size使用相同大小，均为传入的hid_dim。RNN模块可以采用双向GRU实现。

In [4]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
        super().__init__()
        self.vocab_size = input_dim
        ##构建embedding
        self.embedding = nn.Embedding(input_dim, emb_dim)
        ##构建rnn模块
        self.rnn = nn.GRU(emb_dim, enc_hid_dim, bidirectional = True)
        ##构建dropout层
        self.dropout = nn.Dropout(dropout)
        ##构建linear层
        self.fc = nn.Linear(enc_hid_dim*2, dec_hid_dim)
        
        
    def forward(self, src_info):
        src, src_len = src_info
       
        ## 补充embedding层（+dropout）代码
        embedded = self.dropout(self.embedding(src))
        ## 补充rnn层代码
        outputs, hidden = self.rnn(embedded)
        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]),dim = 1)))
        
        return outputs, hidden

### 2.2 Attention模块
Attention 机制的 Encoder-Decoder 模型则是要从序列中学习到每一个元素的重要程度，然后按重要程度将元素合并。因此，注意力机制可以看作是 Encoder 和 Decoder 之间的接口，它向 Decoder 提供来自每个 Encoder 隐藏状态的信息。通过该设置，模型能够选择性地关注输入序列的有用部分，从而学习它们之间的“对齐”。这就表明，在 Encoder 将输入的序列元素进行编码时，得到的不在是一个固定的语义编码 C ，而是存在多个语义编码，且不同的语义编码由不同的序列元素以不同的权重参数组合而成。

在 Attention 机制下，语义编码 C 是各个元素按其重要程度加权求和得到的，即：

$C_i=\sum_{i=1}^{n}{a}_{ij}*{h_i}$

参数 𝑖 表示时刻， 𝑗 表示序列中的第 𝑗 个元素， 𝑇𝑥 表示序列的长度， 𝑓(⋅) 表示对元素 𝑥𝑗 的编码。𝑎𝑖𝑗 可以看作是一个概率，反映了元素 ℎ𝑗 对 𝐶𝑖 的重要性，可以使用 softmax 来表示：

${a}_{ij}= \frac{exp(e_{ij})}{\sum_{k=1}^{T_x}exp(e_{ik})}$

where $e_{ij}=a(s_{i-1}, h_j)$

![image.png](attachment:image.png)

请实现Attention模块：
（也可以不使用单独的Attention模块，在decoder部分的代码中整合Attention机制）

In [5]:
class Attention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.attn = nn.Linear((enc_hid_dim*2)+ dec_hid_dim, dec_hid_dim)
        self.v = nn.Parameter(torch.rand(dec_hid_dim))

    def forward(self, hidden, encoder_outputs, mask):
        #hidden = [batch size, dec hid dim]
        #encoder_outputs = [src len, batch size, enc hid dim * 2]
        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]
        
        #将decoder的隐状态重复src_len次
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        #hidden = [batch size, src len, dec hid dim]
        #encoder_outputs = [batch size, src len, enc hid dim * 2]
        
        ##请补充计算attention score的代码
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2)))
        energy = energy.permute(0, 2, 1)
        #energy = [batch size, dec hid dim, src sent len]
        #v = [dec hid dim]
        
        ##mask掉padding部分，计算softmax
        v = self.v.repeat(batch_size, 1).unsqueeze(1)
        attn = torch.bmm(v,energy).squeeze(1)
        attn = attn.masked_fill(mask, -1e6)
        output = F.softmax(attn, dim=1)
        return output    


### 2.3 Decoder
请实现带attention机制的decoder：

In [6]:
class Decoder(nn.Module):
    def __init__(self, output_dim,emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        ## 继续补充
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU((enc_hid_dim * 2)+emb_dim, dec_hid_dim)
        self.out = nn.Linear((enc_hid_dim * 2) + dec_hid_dim + emb_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_, hidden, encoder_outputs, mask):
        # input = [batch_size]
        # hidden = [batch_size, dec_hid_dim]
        # encoder_outputs = [src_sent_len, batch_size, enc_hid_dim * 2]
        input_ = input_.unsqueeze(1)
        
        ##embedding层
        embedded = self.dropout(self.embedding(input_))
        
        ##计算attention score
        attn = self.attention(hidden, encoder_outputs, mask)
        attn = attn.unsqueeze(1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        
        ##根据attention score计算weighted的context向量
        weight = torch.bmm(attn, encoder_outputs)
        weight = weight.permute(1, 0, 2)
        embedded = embedded.permute(1, 0, 2)
        rnn_input = torch.cat((embedded, weight), dim = 2)
        output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
        
        assert (output == hidden).all()
        
        embedded = embedded.squeeze(0)
        output = output.squeeze(0)
        weight = weight.squeeze(0)
        
        output = self.out(torch.cat((output, weight, embedded), dim = 1))
        
        #output = [bsz, output dim]
        
        return output, hidden.squeeze(0), attn.squeeze(0)

## 2.4 Seq2seq模型

In [7]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device, pad_idx):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        self.pad_idx = pad_idx

    def forward(self, src_info, trg = None):
                
        src, src_len = src_info
        batch_size = src.shape[1]
        max_len = trg.shape[0] if trg is not None else MAX_LEN
        trg_vocab_size = self.decoder.output_dim
        
        ##存储所有decoder输出的结果
        outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)
        attn_scores = []
        
        ## encoder
        encoder_outputs, hidden = self.encoder(src_info)

        ##初始化decoder的输入是<sos>token
        input = trg[0, :] if trg is not None else src[0, :]
        #mask = [batch size, src len]
        mask = self.create_mask(src)

        
        ## decode过程，每个step decode出一个token 
        for t in range(1, max_len):
            ## 请补全decoder的代码，得到output, hidden, atten_score
            output, hidden, atten_score = self.decoder(input, hidden, encoder_outputs,mask)
            outputs[t] = output
            input = output.argmax(1)
            attn_scores.append(atten_score)
            
        return outputs, torch.cat(attn_scores, dim = 1).to(self.device)

    def create_mask(self, src):
        mask = (src != self.pad_idx).permute(1, 0)
        return mask

## 3.训练模块代码
包括BLEU的计算：BLEU是一种评价机器翻译的指标，NLTK中包含了计算BLEU值的工具。

In [8]:
## bleu计算
import jieba
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction

def calculate_bleu(hypothesis, targets, cut=True, verbose=False):
    bleu_scores = []
    for sent, trg in zip(hypothesis, targets):
        trg = trg.strip().lower().split()
        sent = sent.strip()
        if cut:
            trg = list(jieba.cut(''.join(trg).replace("-", "")))
            sent = list(jieba.cut(''.join(sent).replace("-", "")))
 
        bleu = sentence_bleu([trg], sent, weights=(0.5, 0.5, 0., 0.),smoothing_function = SmoothingFunction().method1)
        if verbose:
            print(f"src:{sent.strip()}\ntrg:{trg}\npredict:{sent}\n{bleu}\n")
        bleu_scores.append(bleu)         
    return sum(bleu_scores) / len(bleu_scores)

请补充train_iter的代码:
（在train函数中将调用train_iter，或者直接将这部分整合到train函数中）

In [9]:
def train_iter(model, iterator, optimizer, criterion, clip, nl_field = None):
    model.train()
    optimizer.zero_grad()
    epoch_loss = 0
    for i, batch in enumerate(tqdm(iterator)):
        src = batch.en
        trg = batch.zh
        
        ##补充训练部分的代码
        pred, _ = model(src,trg)
        pred_dim = pred.shape[-1]
        trg = trg[1:].view(-1)
        pred = pred[1:].view(-1, pred_dim)
        
        loss = criterion(pred, trg)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()         
        
    return epoch_loss / len(iterator)

请补充完成evaluate_iter的代码：
该函数用于评测模型在验证集上的效果。
生成时可以采用greedy search，即每次直接选择概率最大的词作为输出，直到出现终结符eos或达到最大句子长度，有余力的同学可以尝试用beam search生成。

In [10]:
def evaluate_iter(model, iterator, en_field, zh_field, criterion):
    model.eval()

    hypothesis, targets = [], []
    eval_loss = 0
    with torch.no_grad():
        for _, batch in enumerate(iterator):
            src = batch.en
            trg = batch.zh

            length = len(trg)
            ## 得到decoder的输出结果并计算loss，同时获取batch_size
            pred, _ = model(src,trg)
            
            src, src_len = batch.en
            batch_size = len(src[0])

            ##解码每个句子，采用greedy search，每一步选择概率最大的词，可以改进为beam search
            for sent in range(batch_size):
                predicts = []  # 模型生成的文本  
                
                for i in range(1,length): 
                ## 请补充生成部分的代码
                    tmp = pred[i][sent]
                    tmp = tmp.argmax()
                    if tmp == zh_field.vocab.stoi['<eos>'] or tmp == zh_field.vocab.stoi['<pad>']:
                        break
                    predicts.append(zh_field.vocab.itos[tmp])
                hypothesis.append(' '.join(predicts))
                
                trg_index = [x.item() for x in trg[:, sent]]
                trg_index = trg_index.index(zh_field.vocab.stoi['<eos>']) # 去掉<eos>token
                trg_str = ' '.join([zh_field.vocab.itos[x.item()] for x in trg[1: trg_index, sent]])
                targets.append(trg_str) # 参考文本（ground truth）
                
            # 根据模型生成的文本和参考文本计算BLEU值
            bleu = calculate_bleu(hypothesis, targets)

            pred_dim = pred.shape[-1]
            pred = pred[1:].view(-1, pred_dim)
            trg = trg[1:].view(-1)

            loss = criterion(pred, trg)
            eval_loss += loss.item()
  
    return bleu, eval_loss / len(iterator)

## 训练模型

In [11]:
def train(dataloader, model, model_output_path):
    print('Start training...')
    
    ## 补充训练部分的代码，optimizer，计算损失函数的criterion等
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-3)
    best_loss = float('inf')
    best_bleu = float(0)
    for epoch in range(N_EPOCHS):
        
        ## 调用train_iter函数
        train_loss = train_iter(model, dataloader.train_iterator, optimizer, criterion, CLIP)
        bleu, valid_loss = evaluate_iter(model, dataloader.dev_iterator, dataloader.en_field, dataloader.zh_field,criterion)

        ## 每5个epoch保存一次模型
        if epoch%5 == 0:
            torch.save(model, f'model_{epoch}.pt')
            
        ## 计算当前模型在验证集上的效果，如果损失比之前的更小，将当前模型保存为'model_best.pt'
        if valid_loss < best_loss:
            best_loss = valid_loss
            best_bleu = bleu
            torch.save(model, 'model_best.pt')
            
        print(f'Best BLEU: {best_bleu:.3f} | Best Loss:{best_loss:.3f} |  Epoch: {epoch:d} |  BLeu： {bleu:.3f} | Loss:{valid_loss}', flush=True)


In [12]:
torch.cuda.get_device_name(0)

'Tesla T4'

In [13]:
## 参数设定(可以自己修改)
MAX_LEN = 256 
TRAIN_BATCH_SIZE = 256
INFERENCE_BATCH_SIZE = 256
HID_DIM = 256
DROPOUT=0.1
N_EPOCHS = 35
CLIP = 1
# device = torch.cuda.get_device_name(0)
device = torch.device('cuda')
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_PATH='./model/'

In [14]:
#if out of memory
torch.cuda.empty_cache()

In [15]:
##定义各个模块
dataloader = Dataloader(TRAIN_BATCH_SIZE, device)
attn = Attention(HID_DIM, HID_DIM)
INPUT_DIM = len(dataloader.en_field.vocab)
OUTPUT_DIM = len(dataloader.zh_field.vocab)
encoder = Encoder(INPUT_DIM, HID_DIM, HID_DIM, HID_DIM, DROPOUT)
decoder = Decoder(OUTPUT_DIM, HID_DIM, HID_DIM, HID_DIM, DROPOUT, attn)
model = Seq2Seq(encoder, decoder, device, dataloader.zh_field.vocab.stoi['<pad>']).to(device)

en vocab size: 6786 zh vocab size: 5261


In [16]:
print(f"Train dataset : {len(dataloader.train_iterator)}")
print(f"Validation dataset : {len(dataloader.dev_iterator)}")

Train dataset : 46
Validation dataset : 12


In [17]:
## 开始训练 BATCH_SIZE = 256, HID = 256, MAX_LEN = 256, dropout = 0.1, epoch = 35
train(dataloader, model, model_output_path= MODEL_PATH)

  0%|          | 0/46 [00:00<?, ?it/s]

Start training...


100%|██████████| 46/46 [00:22<00:00,  2.04it/s]
Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
Loading model cost 0.819 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.001 | Best Loss:2.884 |  Epoch: 0 |  BLeu： 0.001 | Loss:2.88370951016744


100%|██████████| 46/46 [00:21<00:00,  2.11it/s]


Best BLEU: 0.021 | Best Loss:2.299 |  Epoch: 1 |  BLeu： 0.021 | Loss:2.298826684554418


100%|██████████| 46/46 [00:21<00:00,  2.12it/s]


Best BLEU: 0.021 | Best Loss:2.299 |  Epoch: 2 |  BLeu： 0.009 | Loss:2.3112124701340995


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.021 | Best Loss:2.299 |  Epoch: 3 |  BLeu： 0.019 | Loss:2.3079055547714233


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.035 | Best Loss:2.122 |  Epoch: 4 |  BLeu： 0.035 | Loss:2.121891568104426


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.035 | Best Loss:2.122 |  Epoch: 5 |  BLeu： 0.036 | Loss:2.2850318948427835


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.033 | Best Loss:2.081 |  Epoch: 6 |  BLeu： 0.033 | Loss:2.0810148318608603


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.043 | Best Loss:2.013 |  Epoch: 7 |  BLeu： 0.043 | Loss:2.0129402776559195


100%|██████████| 46/46 [00:21<00:00,  2.09it/s]


Best BLEU: 0.047 | Best Loss:1.962 |  Epoch: 8 |  BLeu： 0.047 | Loss:1.9621246059735615


100%|██████████| 46/46 [00:22<00:00,  2.04it/s]


Best BLEU: 0.046 | Best Loss:1.921 |  Epoch: 9 |  BLeu： 0.046 | Loss:1.9205033133427303


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.052 | Best Loss:1.915 |  Epoch: 10 |  BLeu： 0.052 | Loss:1.9147737224896748


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.052 | Best Loss:1.915 |  Epoch: 11 |  BLeu： 0.048 | Loss:1.927826037009557


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.052 | Best Loss:1.915 |  Epoch: 12 |  BLeu： 0.051 | Loss:1.9465919335683186


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.052 | Best Loss:1.915 |  Epoch: 13 |  BLeu： 0.047 | Loss:2.0136609623829522


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.056 | Best Loss:1.809 |  Epoch: 14 |  BLeu： 0.056 | Loss:1.808561344941457


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.056 | Best Loss:1.809 |  Epoch: 15 |  BLeu： 0.052 | Loss:1.8684666653474171


100%|██████████| 46/46 [00:21<00:00,  2.11it/s]


Best BLEU: 0.056 | Best Loss:1.809 |  Epoch: 16 |  BLeu： 0.057 | Loss:1.8925818453232448


100%|██████████| 46/46 [00:21<00:00,  2.12it/s]


Best BLEU: 0.054 | Best Loss:1.767 |  Epoch: 17 |  BLeu： 0.054 | Loss:1.7669639190038045


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.054 | Best Loss:1.767 |  Epoch: 18 |  BLeu： 0.065 | Loss:1.8142983416716258


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.054 | Best Loss:1.767 |  Epoch: 19 |  BLeu： 0.055 | Loss:1.8105014661947887


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.059 | Best Loss:1.761 |  Epoch: 20 |  BLeu： 0.059 | Loss:1.7605027457078297


100%|██████████| 46/46 [00:22<00:00,  2.05it/s]


Best BLEU: 0.059 | Best Loss:1.761 |  Epoch: 21 |  BLeu： 0.064 | Loss:1.8160927246014278


100%|██████████| 46/46 [00:22<00:00,  2.09it/s]


Best BLEU: 0.059 | Best Loss:1.761 |  Epoch: 22 |  BLeu： 0.051 | Loss:1.9594118495782216


100%|██████████| 46/46 [00:22<00:00,  2.08it/s]


Best BLEU: 0.060 | Best Loss:1.750 |  Epoch: 23 |  BLeu： 0.060 | Loss:1.7504143714904785


100%|██████████| 46/46 [00:21<00:00,  2.09it/s]


Best BLEU: 0.060 | Best Loss:1.750 |  Epoch: 24 |  BLeu： 0.064 | Loss:1.7933811396360397


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.061 | Best Loss:1.736 |  Epoch: 25 |  BLeu： 0.061 | Loss:1.7364511837561925


100%|██████████| 46/46 [00:22<00:00,  2.05it/s]


Best BLEU: 0.061 | Best Loss:1.736 |  Epoch: 26 |  BLeu： 0.065 | Loss:1.8023030658562977


100%|██████████| 46/46 [00:21<00:00,  2.12it/s]


Best BLEU: 0.061 | Best Loss:1.736 |  Epoch: 27 |  BLeu： 0.063 | Loss:1.7450094670057297


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 28 |  BLeu： 0.062 | Loss:1.6945851941903431


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 29 |  BLeu： 0.061 | Loss:1.7235580881436665


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 30 |  BLeu： 0.058 | Loss:1.7548922896385193


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 31 |  BLeu： 0.066 | Loss:1.7338055968284607


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 32 |  BLeu： 0.062 | Loss:1.7400171011686325


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 33 |  BLeu： 0.063 | Loss:1.7193938692410786


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.062 | Best Loss:1.695 |  Epoch: 34 |  BLeu： 0.063 | Loss:1.753219371040662


## 推理部分
请补充完成推理（inference）部分的代码，用于在测试集上生成翻译后的文本（该部分与evaluate_iter比较类似）。

In [18]:
def inference(model, iterator, en_field, zh_field):
    model.eval()

    with torch.no_grad():
        predict_res = []
        for _, batch in enumerate(iterator):
            src = batch.en
            id, trg = batch.id, batch.zh
            length = len(trg)
            
            ## 得到decoder的输出结果
            pred, _ = model(src,trg)

            src, src_len = batch.en
            batch_size = len(src[0])
            idx = 0
            for sent in range(batch_size):
                if en_field is not None:

                    eos_index = [x.item() for x in src[:, sent]]
                    
                    eos_index = eos_index.index(en_field.vocab.stoi['<eos>'])
                    src_str = ' '.join([en_field.vocab.itos[x.item()] for x in src[1: eos_index, sent]])
                    sent_id = id[sent]
                predicts = []
                grounds = []
                trg_index = [x.item() for x in trg[:, sent]]
                    
                trg_index = trg_index.index(zh_field.vocab.stoi['<eos>'])
                trg_str = ' '.join([zh_field.vocab.itos[x.item()] for x in trg[1: trg_index, sent]])
                for i in range(1,length): 
                ## 请补充生成部分的代码
                    tmp = pred[i][sent]
                    tmp = tmp.argmax()

                    # 去掉<eos>token
                    if tmp == zh_field.vocab.stoi['<eos>'] or tmp == zh_field.vocab.stoi['<pad>'] :
                        break               
                
                    predicts.append(zh_field.vocab.itos[tmp])
                grounds.append(trg_str)
                
                predict_res.append((int(sent_id), src_str, ' '.join(predicts), " ".join(grounds)))
                idx += length
    predict_res = [(item[1],item[2], item[3] ) for item in sorted(predict_res, key=lambda x: x[0])]

    bleu = calculate_bleu([i[1] for i in predict_res], [i[2] for i in predict_res])
    return bleu, predict_res

读取效果最好的模型，在测试集上进行生成：

In [19]:
dataloader = Dataloader(INFERENCE_BATCH_SIZE, device, eval=True)
model_init_path = f"model_best.pt"
# model = Seq2Seq(encoder, decoder, device, dataloader.zh_field.vocab.stoi['<pad>']).to(device)
# model.load_state_dict(torch.load(model_init_path))
model = torch.load(model_init_path)
bleu, predict_output = inference(model, dataloader.test_iterator, dataloader.en_field, dataloader.zh_field)
for item in predict_output:
    src, pred, trg = item
    print(f"src:{src}\ntrg:{trg}\npred:{pred}\n")
print(bleu)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
trg:" html " 超文本 标记 语言 支持
pred:" 支持 " 支持 支持 支持 支持

src:changed in version 3.4: <unk> parameter was added.
trg:在 3.4 版 更改 : 添加 了 <unk> 形参
pred:在 3.4 版 更改 : 添加 了 _ _ _ _ 形参

src:in interactive mode, if the value is not "none", it is converted to a string using the built in "repr()" function and the resulting string is written to standard output on a line by itself (except if the result is "none", so that procedure calls do not cause any <unk>
trg:在 交互 模式 下 ， 如果 结果 <unk> " none " ， 它会 通过 内置 的 " repr ( ) " 函数 转换 为 一个 字符串 ， 该 结果 字符串 将 以 单独 一行 的 形式 写入 标准 输出 （ <unk> 是 如果 结果 为 " none " ， 则 该 过程 调用 不 产生 任何 输出 。 ）
pred:如果 ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， ， " ， " " "

src:it is an error to call this function if the <unk> on the input bytes object is not one
trg:如果 输入 字节 串 对象 的 引用 计数 不 为 <unk> 调用 此 函数 将 报错
pred:如果 在 ， ， ， ， ， ， ， ， ， ， 被 将

src:<unk> is a list of names to <unk> and defaults to <unk> <

##下载模型

In [21]:
from google.colab import files

for i in range(35):
  if i % 5 == 0:
    file = f"model_{i}.pt"
    files.download(file) 
files.download('model_best.pt') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## 其他结果

In [None]:
## 开始训练 BATCH_SIZE = 256, HID = 256, MAX_LEN = 256, dropout = 0.1, epoch = 20
train(dataloader, model, model_output_path= MODEL_PATH)

  0%|          | 0/46 [00:00<?, ?it/s]

Start training...


100%|██████████| 46/46 [00:22<00:00,  2.08it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.704 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.005 | Best Loss:2.537 |  Epoch: 0 |  BLeu： 0.005 | Loss:2.5374787747859955


100%|██████████| 46/46 [00:21<00:00,  2.13it/s]


Best BLEU: 0.005 | Best Loss:2.537 |  Epoch: 1 |  BLeu： 0.010 | Loss:2.6764090160528817


100%|██████████| 46/46 [00:21<00:00,  2.09it/s]


Best BLEU: 0.017 | Best Loss:2.299 |  Epoch: 2 |  BLeu： 0.017 | Loss:2.2994505167007446


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.019 | Best Loss:2.178 |  Epoch: 3 |  BLeu： 0.019 | Loss:2.177759885787964


100%|██████████| 46/46 [00:22<00:00,  2.09it/s]


Best BLEU: 0.019 | Best Loss:2.178 |  Epoch: 4 |  BLeu： 0.035 | Loss:2.3839192191759744


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.038 | Best Loss:2.154 |  Epoch: 5 |  BLeu： 0.038 | Loss:2.1542376279830933


100%|██████████| 46/46 [00:22<00:00,  2.04it/s]


Best BLEU: 0.039 | Best Loss:2.119 |  Epoch: 6 |  BLeu： 0.039 | Loss:2.119241009155909


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.041 | Best Loss:1.989 |  Epoch: 7 |  BLeu： 0.041 | Loss:1.988772675395012


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.056 | Best Loss:1.962 |  Epoch: 8 |  BLeu： 0.056 | Loss:1.961515059073766


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.056 | Best Loss:1.962 |  Epoch: 9 |  BLeu： 0.045 | Loss:2.0014084627230964


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.048 | Best Loss:1.926 |  Epoch: 10 |  BLeu： 0.048 | Loss:1.9258283972740173


100%|██████████| 46/46 [00:22<00:00,  2.09it/s]


Best BLEU: 0.056 | Best Loss:1.903 |  Epoch: 11 |  BLeu： 0.056 | Loss:1.9034176866213481


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.059 | Best Loss:1.872 |  Epoch: 12 |  BLeu： 0.059 | Loss:1.8715187162160873


100%|██████████| 46/46 [00:22<00:00,  2.03it/s]


Best BLEU: 0.059 | Best Loss:1.872 |  Epoch: 13 |  BLeu： 0.054 | Loss:1.8769640078147252


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.059 | Best Loss:1.872 |  Epoch: 14 |  BLeu： 0.050 | Loss:1.9413335621356964


100%|██████████| 46/46 [00:22<00:00,  2.08it/s]


Best BLEU: 0.058 | Best Loss:1.847 |  Epoch: 15 |  BLeu： 0.058 | Loss:1.8471821049849193


100%|██████████| 46/46 [00:22<00:00,  2.05it/s]


Best BLEU: 0.058 | Best Loss:1.847 |  Epoch: 16 |  BLeu： 0.053 | Loss:1.8525814513365428


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.059 | Best Loss:1.812 |  Epoch: 17 |  BLeu： 0.059 | Loss:1.81206876039505


100%|██████████| 46/46 [00:22<00:00,  2.08it/s]


Best BLEU: 0.059 | Best Loss:1.812 |  Epoch: 18 |  BLeu： 0.054 | Loss:1.8773861279090245


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.057 | Best Loss:1.744 |  Epoch: 19 |  BLeu： 0.057 | Loss:1.7444926649332047


In [None]:
## 开始训练 BATCH_SIZE = 256, HID = 256, MAX_LEN = 256, dropout = 0.2, epoch = 20
train(dataloader, model, model_output_path= MODEL_PATH)

  0%|          | 0/46 [00:00<?, ?it/s]

Start training...


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.707 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.014 | Best Loss:2.938 |  Epoch: 0 |  BLeu： 0.014 | Loss:2.9378602604071298


100%|██████████| 46/46 [00:22<00:00,  2.09it/s]


Best BLEU: 0.011 | Best Loss:2.531 |  Epoch: 1 |  BLeu： 0.011 | Loss:2.5309643546740213


100%|██████████| 46/46 [00:22<00:00,  2.09it/s]


Best BLEU: 0.012 | Best Loss:2.233 |  Epoch: 2 |  BLeu： 0.012 | Loss:2.232854058345159


100%|██████████| 46/46 [00:22<00:00,  2.08it/s]


Best BLEU: 0.012 | Best Loss:2.233 |  Epoch: 3 |  BLeu： 0.015 | Loss:2.475700000921885


100%|██████████| 46/46 [00:21<00:00,  2.09it/s]


Best BLEU: 0.012 | Best Loss:2.233 |  Epoch: 4 |  BLeu： 0.026 | Loss:2.611272225777308


100%|██████████| 46/46 [00:22<00:00,  2.08it/s]


Best BLEU: 0.027 | Best Loss:2.124 |  Epoch: 5 |  BLeu： 0.027 | Loss:2.1235190431276956


100%|██████████| 46/46 [00:22<00:00,  2.04it/s]


Best BLEU: 0.043 | Best Loss:2.049 |  Epoch: 6 |  BLeu： 0.043 | Loss:2.049031009276708


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.043 | Best Loss:2.049 |  Epoch: 7 |  BLeu： 0.049 | Loss:2.1287727455298104


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.040 | Best Loss:1.973 |  Epoch: 8 |  BLeu： 0.040 | Loss:1.9734087586402893


100%|██████████| 46/46 [00:22<00:00,  2.07it/s]


Best BLEU: 0.050 | Best Loss:1.955 |  Epoch: 9 |  BLeu： 0.050 | Loss:1.9553527633349101


100%|██████████| 46/46 [00:22<00:00,  2.04it/s]


Best BLEU: 0.050 | Best Loss:1.955 |  Epoch: 10 |  BLeu： 0.046 | Loss:2.0000447034835815


100%|██████████| 46/46 [00:22<00:00,  2.04it/s]


Best BLEU: 0.045 | Best Loss:1.919 |  Epoch: 11 |  BLeu： 0.045 | Loss:1.9193393687407176


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.046 | Best Loss:1.864 |  Epoch: 12 |  BLeu： 0.046 | Loss:1.863968128959338


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.046 | Best Loss:1.864 |  Epoch: 13 |  BLeu： 0.049 | Loss:1.8819983502229054


100%|██████████| 46/46 [00:22<00:00,  2.09it/s]


Best BLEU: 0.046 | Best Loss:1.864 |  Epoch: 14 |  BLeu： 0.051 | Loss:1.8837924698988597


100%|██████████| 46/46 [00:21<00:00,  2.11it/s]


Best BLEU: 0.051 | Best Loss:1.797 |  Epoch: 15 |  BLeu： 0.051 | Loss:1.7965218722820282


100%|██████████| 46/46 [00:21<00:00,  2.10it/s]


Best BLEU: 0.051 | Best Loss:1.797 |  Epoch: 16 |  BLeu： 0.054 | Loss:1.876356432835261


100%|██████████| 46/46 [00:22<00:00,  2.05it/s]


Best BLEU: 0.061 | Best Loss:1.783 |  Epoch: 17 |  BLeu： 0.061 | Loss:1.783234715461731


100%|██████████| 46/46 [00:21<00:00,  2.11it/s]


Best BLEU: 0.061 | Best Loss:1.783 |  Epoch: 18 |  BLeu： 0.048 | Loss:2.0414277215798697


100%|██████████| 46/46 [00:22<00:00,  2.06it/s]


Best BLEU: 0.061 | Best Loss:1.783 |  Epoch: 19 |  BLeu： 0.051 | Loss:1.7857594937086105


In [None]:
## 开始训练 BATCH_SIZE = 128, HID = 512, MAX_LEN = 256, dropout = 0.2, epoch = 20
train(dataloader, model, model_output_path= MODEL_PATH)

  1%|          | 1/92 [00:00<00:13,  6.97it/s]

Start training...


100%|██████████| 92/92 [00:37<00:00,  2.43it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.643 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.028 | Best Loss:3.167 |  Epoch: 0 |  BLeu： 0.028 | Loss:3.167074576668117


100%|██████████| 92/92 [00:38<00:00,  2.42it/s]


Best BLEU: 0.053 | Best Loss:2.408 |  Epoch: 1 |  BLeu： 0.053 | Loss:2.4078190689501553


100%|██████████| 92/92 [00:38<00:00,  2.38it/s]


Best BLEU: 0.052 | Best Loss:2.342 |  Epoch: 2 |  BLeu： 0.052 | Loss:2.3416026986163594


100%|██████████| 92/92 [00:39<00:00,  2.34it/s]


Best BLEU: 0.052 | Best Loss:2.342 |  Epoch: 3 |  BLeu： 0.058 | Loss:2.4345824329749397


100%|██████████| 92/92 [00:38<00:00,  2.42it/s]


Best BLEU: 0.052 | Best Loss:2.342 |  Epoch: 4 |  BLeu： 0.044 | Loss:2.80106142292852


100%|██████████| 92/92 [00:38<00:00,  2.41it/s]


Best BLEU: 0.050 | Best Loss:2.130 |  Epoch: 5 |  BLeu： 0.050 | Loss:2.1295799457508586


100%|██████████| 92/92 [00:38<00:00,  2.39it/s]


Best BLEU: 0.065 | Best Loss:1.978 |  Epoch: 6 |  BLeu： 0.065 | Loss:1.9781689514284548


100%|██████████| 92/92 [00:38<00:00,  2.42it/s]


Best BLEU: 0.065 | Best Loss:1.978 |  Epoch: 7 |  BLeu： 0.065 | Loss:2.233002032922662


100%|██████████| 92/92 [00:38<00:00,  2.41it/s]


Best BLEU: 0.065 | Best Loss:1.978 |  Epoch: 8 |  BLeu： 0.062 | Loss:2.0614668415940325


100%|██████████| 92/92 [00:38<00:00,  2.38it/s]


Best BLEU: 0.065 | Best Loss:1.978 |  Epoch: 9 |  BLeu： 0.060 | Loss:2.054632041765296


100%|██████████| 92/92 [00:38<00:00,  2.39it/s]


Best BLEU: 0.065 | Best Loss:1.978 |  Epoch: 10 |  BLeu： 0.055 | Loss:2.0744294731513313


100%|██████████| 92/92 [00:38<00:00,  2.37it/s]


Best BLEU: 0.066 | Best Loss:1.966 |  Epoch: 11 |  BLeu： 0.066 | Loss:1.9657382926215297


100%|██████████| 92/92 [00:38<00:00,  2.38it/s]


Best BLEU: 0.066 | Best Loss:1.966 |  Epoch: 12 |  BLeu： 0.064 | Loss:1.9660250389057656


100%|██████████| 92/92 [00:38<00:00,  2.41it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 13 |  BLeu： 0.063 | Loss:1.9431526038957678


100%|██████████| 92/92 [00:38<00:00,  2.38it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 14 |  BLeu： 0.057 | Loss:2.111995015455329


100%|██████████| 92/92 [00:38<00:00,  2.40it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 15 |  BLeu： 0.061 | Loss:1.9474858589794324


100%|██████████| 92/92 [00:37<00:00,  2.43it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 16 |  BLeu： 0.068 | Loss:1.9926913251047549


100%|██████████| 92/92 [00:38<00:00,  2.40it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 17 |  BLeu： 0.066 | Loss:1.9609429577122564


100%|██████████| 92/92 [00:38<00:00,  2.38it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 18 |  BLeu： 0.071 | Loss:2.0863009328427524


100%|██████████| 92/92 [00:38<00:00,  2.38it/s]


Best BLEU: 0.063 | Best Loss:1.943 |  Epoch: 19 |  BLeu： 0.068 | Loss:2.2208136501519578


In [None]:
## 开始训练 BATCH_SIZE = 128, HID = 512 , MAX_LEN = 128, dropout = 0.2, epoch = 20
train(dataloader, model, model_output_path= MODEL_PATH)

  0%|          | 0/88 [00:00<?, ?it/s]

Start training...


100%|██████████| 88/88 [00:24<00:00,  3.55it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.653 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.031 | Best Loss:3.134 |  Epoch: 0 |  BLeu： 0.031 | Loss:3.133951409296556


100%|██████████| 88/88 [00:25<00:00,  3.47it/s]


Best BLEU: 0.042 | Best Loss:2.661 |  Epoch: 1 |  BLeu： 0.042 | Loss:2.6612503826618195


100%|██████████| 88/88 [00:25<00:00,  3.50it/s]


Best BLEU: 0.053 | Best Loss:2.565 |  Epoch: 2 |  BLeu： 0.053 | Loss:2.564983457326889


100%|██████████| 88/88 [00:25<00:00,  3.44it/s]


Best BLEU: 0.046 | Best Loss:2.392 |  Epoch: 3 |  BLeu： 0.046 | Loss:2.391543783924796


100%|██████████| 88/88 [00:25<00:00,  3.46it/s]


Best BLEU: 0.068 | Best Loss:2.318 |  Epoch: 4 |  BLeu： 0.068 | Loss:2.3184892372651533


100%|██████████| 88/88 [00:25<00:00,  3.40it/s]


Best BLEU: 0.068 | Best Loss:2.318 |  Epoch: 5 |  BLeu： 0.067 | Loss:2.362742153081027


100%|██████████| 88/88 [00:26<00:00,  3.36it/s]


Best BLEU: 0.061 | Best Loss:2.211 |  Epoch: 6 |  BLeu： 0.061 | Loss:2.2110009599815714


100%|██████████| 88/88 [00:25<00:00,  3.42it/s]


Best BLEU: 0.061 | Best Loss:2.211 |  Epoch: 7 |  BLeu： 0.066 | Loss:2.2154708748514


100%|██████████| 88/88 [00:26<00:00,  3.38it/s]


Best BLEU: 0.061 | Best Loss:2.211 |  Epoch: 8 |  BLeu： 0.071 | Loss:2.2242395281791687


100%|██████████| 88/88 [00:26<00:00,  3.33it/s]


Best BLEU: 0.065 | Best Loss:2.205 |  Epoch: 9 |  BLeu： 0.065 | Loss:2.204565546729348


100%|██████████| 88/88 [00:26<00:00,  3.38it/s]


Best BLEU: 0.065 | Best Loss:2.205 |  Epoch: 10 |  BLeu： 0.066 | Loss:2.2505959164012563


100%|██████████| 88/88 [00:25<00:00,  3.45it/s]


Best BLEU: 0.065 | Best Loss:2.205 |  Epoch: 11 |  BLeu： 0.070 | Loss:2.2175158099694685


100%|██████████| 88/88 [00:25<00:00,  3.39it/s]


Best BLEU: 0.065 | Best Loss:2.205 |  Epoch: 12 |  BLeu： 0.067 | Loss:2.20591785420071


100%|██████████| 88/88 [00:26<00:00,  3.38it/s]


Best BLEU: 0.065 | Best Loss:2.205 |  Epoch: 13 |  BLeu： 0.063 | Loss:2.3319858746095137


100%|██████████| 88/88 [00:25<00:00,  3.42it/s]


Best BLEU: 0.070 | Best Loss:2.192 |  Epoch: 14 |  BLeu： 0.070 | Loss:2.192487955093384


100%|██████████| 88/88 [00:26<00:00,  3.36it/s]


Best BLEU: 0.071 | Best Loss:2.180 |  Epoch: 15 |  BLeu： 0.071 | Loss:2.1795747469771993


100%|██████████| 88/88 [00:26<00:00,  3.38it/s]


Best BLEU: 0.071 | Best Loss:2.180 |  Epoch: 16 |  BLeu： 0.068 | Loss:2.2208103483373467


100%|██████████| 88/88 [00:25<00:00,  3.40it/s]


Best BLEU: 0.071 | Best Loss:2.180 |  Epoch: 17 |  BLeu： 0.070 | Loss:2.2591538537632334


100%|██████████| 88/88 [00:26<00:00,  3.37it/s]


Best BLEU: 0.071 | Best Loss:2.180 |  Epoch: 18 |  BLeu： 0.069 | Loss:2.27418791976842


100%|██████████| 88/88 [00:26<00:00,  3.37it/s]


Best BLEU: 0.071 | Best Loss:2.180 |  Epoch: 19 |  BLeu： 0.069 | Loss:2.234376918185841


In [None]:
## 开始训练 BATCH_SIZE = 64, HID = 256, MAX_LEN = 128, dropout = 0.2, epoch = 20 
train(dataloader, model, model_output_path= MODEL_PATH)

  1%|          | 1/176 [00:00<00:18,  9.27it/s]

Start training...


100%|██████████| 176/176 [00:17<00:00, 10.17it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.705 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.030 | Best Loss:3.063 |  Epoch: 0 |  BLeu： 0.030 | Loss:3.0633846670389175


100%|██████████| 176/176 [00:17<00:00,  9.93it/s]


Best BLEU: 0.049 | Best Loss:2.875 |  Epoch: 1 |  BLeu： 0.049 | Loss:2.87532232159918


100%|██████████| 176/176 [00:17<00:00, 10.17it/s]


Best BLEU: 0.055 | Best Loss:2.674 |  Epoch: 2 |  BLeu： 0.055 | Loss:2.673534072258256


100%|██████████| 176/176 [00:17<00:00, 10.22it/s]


Best BLEU: 0.060 | Best Loss:2.604 |  Epoch: 3 |  BLeu： 0.060 | Loss:2.6039811819791794


100%|██████████| 176/176 [00:17<00:00, 10.07it/s]


Best BLEU: 0.059 | Best Loss:2.555 |  Epoch: 4 |  BLeu： 0.059 | Loss:2.5551971847360786


100%|██████████| 176/176 [00:17<00:00, 10.12it/s]


Best BLEU: 0.064 | Best Loss:2.469 |  Epoch: 5 |  BLeu： 0.064 | Loss:2.4687918587164446


100%|██████████| 176/176 [00:17<00:00, 10.11it/s]


Best BLEU: 0.063 | Best Loss:2.425 |  Epoch: 6 |  BLeu： 0.063 | Loss:2.425109983167865


100%|██████████| 176/176 [00:17<00:00, 10.14it/s]


Best BLEU: 0.063 | Best Loss:2.425 |  Epoch: 7 |  BLeu： 0.065 | Loss:2.483982327309522


100%|██████████| 176/176 [00:17<00:00, 10.09it/s]


Best BLEU: 0.063 | Best Loss:2.425 |  Epoch: 8 |  BLeu： 0.069 | Loss:2.433832956985994


100%|██████████| 176/176 [00:17<00:00, 10.18it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 9 |  BLeu： 0.065 | Loss:2.3600721582770348


100%|██████████| 176/176 [00:17<00:00, 10.16it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 10 |  BLeu： 0.073 | Loss:2.4946724731813776


100%|██████████| 176/176 [00:17<00:00, 10.09it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 11 |  BLeu： 0.067 | Loss:2.3964231230995874


100%|██████████| 176/176 [00:17<00:00, 10.09it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 12 |  BLeu： 0.065 | Loss:2.401585726575418


100%|██████████| 176/176 [00:17<00:00, 10.10it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 13 |  BLeu： 0.068 | Loss:2.4334150037982245


100%|██████████| 176/176 [00:17<00:00,  9.99it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 14 |  BLeu： 0.069 | Loss:2.520377828316255


100%|██████████| 176/176 [00:17<00:00, 10.22it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 15 |  BLeu： 0.067 | Loss:2.50880887020718


100%|██████████| 176/176 [00:17<00:00, 10.15it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 16 |  BLeu： 0.065 | Loss:2.511512811888348


100%|██████████| 176/176 [00:17<00:00, 10.07it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 17 |  BLeu： 0.072 | Loss:2.4347178746353495


100%|██████████| 176/176 [00:17<00:00, 10.16it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 18 |  BLeu： 0.069 | Loss:2.493690462274985


100%|██████████| 176/176 [00:17<00:00, 10.00it/s]


Best BLEU: 0.065 | Best Loss:2.360 |  Epoch: 19 |  BLeu： 0.068 | Loss:2.443952427669005


In [None]:
## 开始训练 BATCH_SIZE = 64, HID = 512, MAX_LEN = 128, dropout = 0.2, epoch = 20
train(dataloader, model, model_output_path= MODEL_PATH)

  0%|          | 0/176 [00:00<?, ?it/s]

Start training...


100%|██████████| 176/176 [00:28<00:00,  6.10it/s]
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.663 seconds.
Prefix dict has been built successfully.


Best BLEU: 0.045 | Best Loss:3.118 |  Epoch: 0 |  BLeu： 0.045 | Loss:3.1177717352455314


100%|██████████| 176/176 [00:29<00:00,  6.02it/s]


Best BLEU: 0.051 | Best Loss:2.809 |  Epoch: 1 |  BLeu： 0.051 | Loss:2.8090480985966595


100%|██████████| 176/176 [00:29<00:00,  6.03it/s]


Best BLEU: 0.055 | Best Loss:2.602 |  Epoch: 2 |  BLeu： 0.055 | Loss:2.6020196229219437


100%|██████████| 176/176 [00:29<00:00,  5.92it/s]


Best BLEU: 0.071 | Best Loss:2.499 |  Epoch: 3 |  BLeu： 0.071 | Loss:2.4991576116193426


100%|██████████| 176/176 [00:29<00:00,  5.95it/s]


Best BLEU: 0.069 | Best Loss:2.478 |  Epoch: 4 |  BLeu： 0.069 | Loss:2.4779862598939375


100%|██████████| 176/176 [00:29<00:00,  5.92it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 5 |  BLeu： 0.073 | Loss:2.3874780427325857


100%|██████████| 176/176 [00:30<00:00,  5.86it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 6 |  BLeu： 0.067 | Loss:2.4551965330134737


100%|██████████| 176/176 [00:29<00:00,  5.89it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 7 |  BLeu： 0.072 | Loss:2.3980170596729624


100%|██████████| 176/176 [00:29<00:00,  5.89it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 8 |  BLeu： 0.067 | Loss:2.4534764655611734


100%|██████████| 176/176 [00:29<00:00,  5.93it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 9 |  BLeu： 0.073 | Loss:2.4639855934814974


100%|██████████| 176/176 [00:29<00:00,  5.88it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 10 |  BLeu： 0.069 | Loss:2.466751300475814


100%|██████████| 176/176 [00:29<00:00,  5.93it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 11 |  BLeu： 0.071 | Loss:2.4777889983220533


100%|██████████| 176/176 [00:29<00:00,  5.89it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 12 |  BLeu： 0.068 | Loss:2.5359048572453586


100%|██████████| 176/176 [00:30<00:00,  5.82it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 13 |  BLeu： 0.072 | Loss:2.5559854453260247


100%|██████████| 176/176 [00:29<00:00,  5.88it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 14 |  BLeu： 0.076 | Loss:2.512684964320876


100%|██████████| 176/176 [00:30<00:00,  5.84it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 15 |  BLeu： 0.070 | Loss:2.5499500957402317


100%|██████████| 176/176 [00:30<00:00,  5.84it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 16 |  BLeu： 0.073 | Loss:2.54688517071984


100%|██████████| 176/176 [00:29<00:00,  5.87it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 17 |  BLeu： 0.071 | Loss:2.516056165099144


100%|██████████| 176/176 [00:30<00:00,  5.87it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 18 |  BLeu： 0.077 | Loss:2.602894050153819


100%|██████████| 176/176 [00:29<00:00,  5.87it/s]


Best BLEU: 0.073 | Best Loss:2.387 |  Epoch: 19 |  BLeu： 0.074 | Loss:2.612271010875702
