In [None]:
import BLEUscore as _bleu
import Dataloader as _dl

import config
import Model
import utils

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cfg = config.Config()

In [None]:
train_set = _dl.E2EDataset(mode='train', max_src_len=cfg.max_src_len, max_tgt_len=cfg.max_tgt_len)

dev_set = _dl.E2EDataset(mode='dev', max_src_len=cfg.max_src_len, max_tgt_len=cfg.max_tgt_len, field_tokenizer=train_set.field_tokenizer, tokenizer=train_set.tokenizer) 

test_set = _dl.E2EDataset(mode='test', max_src_len=cfg.max_src_len, max_tgt_len=cfg.max_tgt_len, field_tokenizer=train_set.field_tokenizer, tokenizer=train_set.tokenizer) 

train_loader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True) 

In [None]:
model = Model.E2EModel(cfg, src_vocab_size=train_set.tokenizer.vocab_size, tgt_vocab_size=train_set.tokenizer.vocab_size).to(device)

In [None]:
PAD_ID = 0

# 初始化batch大小，epoch数，损失函数，优化器等 
batch_size = cfg.batch_size 
weight = torch.ones(train_set.tokenizer.vocab_size) 
weight[PAD_ID] = 0

# 定义分数评估 
scorer = _bleu.BLEUScore(max_ngram=4) 

# 定义损失函数 
criterion = nn.NLLLoss(weight, size_average=True).to(device) 

# 定义优化器 
optimizer = optim.SGD(params=model.parameters(), lr=cfg.learning_rate) 

# 定义学习率下降 
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) 
MAX_EPOCH = cfg.n_epochs 
VAL_NUM = cfg.val_num 

best_bleu = 0.0  # 最高bleu 
loss_li = [] 
bleu_li = [] 
lr_li = []

In [None]:
from sys import stdout
from numpy import newaxis
import tqdm
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

def train(model, iterator, iter): 
    print_loss = 0.0  # 输出loss 
    model.train() 
    with tqdm(total=len(iterator), desc='epoch{} [train]'.format(iter), file=stdout) as t: 
        for i, batch in enumerate(iterator):
            src, tgt = batch # 移至设备上 
            src = src.to(device).transpose(0, 1) 
            tgt = tgt.to(device).transpose(0, 1)

            optimizer.zero_grad()  # 初始化梯度值 
            
            # Forward 
            logits = model((src, tgt)) 
            vocab_size = logits.size()[-1] 
            logits = logits.contiguous().view(-1, vocab_size) 
            targets = tgt.contiguous().view(-1, 1).squeeze(1) 
            loss = criterion(logits, targets.long()) 
            print_loss += loss.data.item() 

            # Backward 
            loss.backward()  # 反向传播求梯度 
            
            # Update 
            optimizer.step()  # 更新参数

            # Backward 
            loss.backward()  # 反向传播求梯度 
            
            # Update 
            optimizer.step()  # 更新参数 
            t.set_postfix(loss=print_loss / (i + 1), lr=scheduler.get_last_lr()[0]) 
            t.update(1) 

            loss_li.append(print_loss / len(iterator)) 
            lr_li.append(scheduler.get_last_lr()[0]) 
            scheduler.step() 

def evaluate(model, iterator, iter): 
    global best_bleu 
    model.eval() 
    bleu = 0.0 
    total_num = 0 
    # 重置分数统计器 
    scorer.reset() 
    with torch.no_grad(): 
        for data in tqdm(iterator, desc='{} [valid]'.format(" " * (5 + len(str(iter)))), file=stdout): 
            # 重置分数统计器 
            src, tgt, lex, muti_tgt = data 
            src = torch.as_tensor(src[:, newaxis]).to(device) 
            sentence, attention = model.predict(src) 
            # 解码句子 
            sentence = train_set.tokenizer.decode(sentence).replace('[NAME]', lex[0]).replace('[NEAR]', lex[1]) 
            scorer.append(sentence, muti_tgt) 
        
        bleu = scorer.score()  # 计算BLEU 
        bleu_li.append(bleu) 
        print("BLEU SCORE: {:.4f}".format(bleu)) 
        if bleu > best_bleu: 
            best_bleu = bleu 
            torch.save(model, cfg.model_save_path)  # 保存模型 参数 网络 路径 
            print("保存模型成功！") 

def draw_carve(title, save_path, x, y): 
    plt.clf()  # 清空图像 
    plt.title(title)  # 图像标题 
    plt.plot(range(x), y)  # 绘制图像 
    plt.savefig(save_path)  # 保存图像 

In [None]:
# 训练集用于训练，验证集用于评估 
for epoch in range(MAX_EPOCH): 
    train(model, train_loader, epoch)
    # 指定轮数验证 
    if epoch % VAL_NUM == 0: 
        evaluate(model, dev_set, epoch) 

# 绘制验证BLEU曲线 
draw_carve("valid_bleu", './valid_bleu.png', epoch // VAL_NUM + 1, bleu_li) 
# 绘制训练LOSS曲线 
draw_carve("train_loss", './train_loss.png', epoch + 1, loss_li) 
# 绘制训练LR曲线 
draw_carve("train_lr", './train_lr.png', epoch + 1, lr_li) 

In [None]:
model.eval() 
with torch.no_grad(): 
    for data in tqdm(test_set, desc='[test]', file=stdout): 
        src, tgt, lex, _ = data 
        src = torch.as_tensor(src[:, newaxis]).to(device) 
        # 模型预测 
        sentence, attention = model.predict(src) 
        # 解码句子 
        sentence = train_set.tokenizer.decode(sentence).replace('[NAME]', lex[0]).replace('[NEAR]', lex[1]) 
        # 写入文本 
        with open(cfg.result_save_path, 'a+', encoding='utf-8') as f: 
            f.write(sentence + '.\n') 
print('Finished Testing!')