In [2]:
# 11.3 PyTorch实现注意力模型
# 11.3.1 构建Encoder
import torch.nn as nn

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderENN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1,1,-1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1,1,self.hidden_size, device=device)

In [1]:
# 11.3.2 构建简单Decoder
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, input, hidden):
        output = self.embedding(input).view(1,1,-1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1,1,self.hidden_size, device=device)

NameError: name 'nn' is not defined

In [None]:
# 11.3.3 构建注意力Decoder
# 以典型的Bahdanau注意力架构为例，主要有4层。
# 嵌入层（Embedding Layer）将输入字转换为矢量，计算每个编码器输出的注意能量的层、RNN层和输出层

# 1）这些输入，分别通过不同的层接受，yt-1作为嵌入层的输入
embedded = embedding(last_rnn_output)
# 2）注意力层的函数a的输入为st-1和hj，输出为etj，标准化处理后为αtj
attn_energies[j] = attn_layer(last_hidden, encoder_outputs[j])
attn_weights = normalize(attn_energies)
# 3）向量Ct为编码器各输出的注意力加权平均
context = sum(attn_weights*encoder_outputs)
# 4）循环层f的输入为(st-1,yt-1,ct)，输出为内部隐含状态及st
rnn_input = contact(embedded, context)
rnn_output, rnn_hidden = rnn(rnn_input, last_hidden)
# 5）输出层g的输入为(yi-1,si,ci)，输出为：yi
output = out(embedded, rnn_output, context)

In [None]:
# 6）综合以上各步，得到Bahdanau注意力的解码器
class BahdanauAttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
        super(AttnDecoderRNN, self).__init__()
        # 定义参数
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length
        # 定义层
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.attn = GeneralAttn(hidden_size)
        self.gru = nn.GRU(hidden_size*2, hidden_size, n_layers, dropout=dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
    
    def forward(self, word_input, last_hidden, encoder_outputs):
        # 前向传播每次运行一个时间步，但使用所有的编码器输出
        # 获取当前词嵌入（last output word）
        word_embedded = self.embedding(word_input).view(1,1,-1) # S=1*B*N
        word_embedded = self.dropout(word_embedded)
        # 计算注意力权重并使用编码器输出
        attn_weights = self.attn(last_hidden[-1], encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0,1)) # B*1*N
        # 把词嵌入与注意力context结合在一起，传入循环网络
        rnn_input = torch.cat((word_embedded, context),2)
        output, hidden = self.gru(rnn_input, last_hidden)
        # 定义最后输出层
        output = output.squeeze(0) # B*N
        output = F.log_softmax(self.out(torch.cat((output, context),1)))
        # 返回最后输出，隐含状态及注意力权重
        return output,hidden,attn_weights

In [2]:
# 11.4 用注意力机制实现中英文互译
# 11.4.1 导入需要的模块
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import jieba
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.font_manager as fm
myfont = fm.FontProperties(fname='/home/simhei.ttf')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# 11.4.2 数据预处理
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")
    # 读文件，然后分成行
    lines = open('data/PyTorch-11/eng-cmn/%s-%s.txt'%(lang1,lang2),encoding='utf-8'). \
            read().strip().split('\n')
    # 把行分成语句对，并进行规范化
    pairs = [[normalizeString(s) for s in l.split('t')] for l in lines]
    # 判断是否需要转换语句对的次序，如[英文，中文]转换成[中文，英文]次序
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
    return input_lang, output_lang, pairs

In [4]:
# (2)过滤并处理文本信息
# 为便于数据处理， 把Unicode字符串转换为ASCII编码
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c)!='Mn'
    )

# 对英文转换成小写， 去空格及非字母符号等处理
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r"\1", s)
    return s

In [5]:
#(3) 从每个pair中， 制作出中文词典和英文词典
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name=name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0:"SOS", 1:"EOS"}
        self.n_words = 2
    # 处理英文词句
    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)
    # 处理中文词句
    def addSentence_cn(self, sentence):
        for word in list(jieba.cut(semtence)):
            self.addWord(word)
    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [6]:
# (4) 把以上数据预处理函数， 放在一起，实现对数据的预处理
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs"%len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs"%len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence_cn(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

In [None]:
# (5) 运行预处理函数
input_lang, output_lang, pairs = prepareData('eng', 'cmn', True)
print(random.choice(pairs))

In [None]:
# (6) 构建数据集
# 构建数据集，分两种情况。一种是构建英文字典，一种是构建中文字典，
# 构建中文的函数加上了_cn后缀，如indexesFromSentence_cn
def indexsFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split('')]
def indexesFromSentence_cn(lang, sentence):
    return [lang.word2index[word] for word in list(jieba.cut(sentence))]
def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1,1)
def tensorFromSentence_cn(lang, sentence):
    indexes = indexesFromSentence_cn(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1,1)
def tensorsFromPair(pair):
    input_tensor = tensorFromSentence_cn(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [9]:
# 11.4.3 构建模型
# 1.构建编码器
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN,self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1,1,-1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden
    def initHidden(self):
        return torch.zeros(1,1,self.hidden_size, device=device)
# 2.构建带注意力的解码器
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p = 0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN,self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size*2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_szie*2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)
    
    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1,1,-1)
        embedded = self.dropout(embedded)
        attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]),1)),dim=1)
        attn_applied = torch.bmn(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
        output = torch.cat((embedded[0], attn_applied[0]),1)
        output = self.attn_combine(output).unsqueeze(0)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden,attn_weights
    
    def initHidden(self):
        return torch.zeros(1,1,self.hidden_size,device=device)

NameError: name 'MAX_LENGTH' is not defined

In [None]:
# 11.4.4 训练模型
# 1. 定义训练模型函数
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)]
    criterion = nn.NLLLoss()
    for iter in range(1, n_iters+1 ):
        training_pair = training_pairs[iter-1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss
        if iter%print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %(timeSince(start, iter/n_iters), iter, iter/n_iters*100, print_loss_avg))
        if iter%plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
    showPlot(plot_losses)

In [10]:
# 2.执行训练函数
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_szie).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

NameError: name 'input_lang' is not defined

In [None]:
# 11.4.5 随机采样 对模型进行测试
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, paor[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')
evaluateRandomly(encoder1, attn_decoder1)

In [None]:
# 11.4.6 可视化注意力
# 1.定义可视化注意力函数
def showAttention(input_sentence, output_words, attentions):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.numpy(), cmap='bone')
    fig.colorbar(cax)
    # set up axes
    ax.set_xticklabels(['']+list(jieba.cut(input_sentence))+['<EOS>'], rotation=90,fontproperties=myfont)
    ax.set_yticklabels(['']+output_words)
    # show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    plt.show()

In [None]:
# 2.评估一条语句的注意力
def evaluateAndShowAttention(input_sentence):
    output_words, attentions = evaluate(
        encoder1, attn_decoder1, input_sentence)
    print('input=', input_sentence)
    print('output=', ' '.join(output_words))
    showAttention(input_sentence,output_words, attentions)

evaluateAndShowAttention("我们在严肃地讨论你的未来。")