## 简介

In [1]:
"""
在pytorch tutorial的某个例子的基础上作了改动，可以运行，可以作为参考
暂时还是用的例子里面的gru和attention方式,已经实现了bidirectional
正在尝试改为LSTM
"""

'\n在pytorch tutorial的某个例子的基础上作了改动，可以运行，可以作为参考\n暂时还是用的例子里面的gru和attention方式,已经实现了bidirectional\n正在尝试改为LSTM\n'

## 包导入与常量定义

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim
import os
import random
import numpy 
import time
import math
torch.manual_seed(1)
random.seed(1)


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## 数据处理

In [4]:
root_path = "./rnnpg_data_emnlp-2014/partitions_in_Table_2/rnnpg/"  #到数据集的路径，可能根据具体情况修改
BATCH_SIZE=128
LEN = 7 # 用于决定5言还是7言

In [5]:
def get_train_data(fileName):
    """
    @params:
        fileName:文件名，具体应该为"qtrain"

    @return:
        poem_line_lst5:五言绝句列表
        poem_line_lst7:七言绝句列表
        wd2Idx5:适用于五言绝句的wd2Idx映射
        wd2Idx7:适用于七言绝句的wd2Idx映射
        idx2Wd5:适用于五言绝句的idx2Wd映射
        idx2Wd7:适用于七言绝句的idx2Wd映射
        poem_vec_lst5:映射后的五言绝句列表
        poem_vec_lst7:映射后的七言绝句列表
    
    其它:
        暂时没有为每句诗加上<S>和<E>
    """
    poem_line_lst5 = []
    poem_line_lst7 = []

    poem_vec_lst5 = []
    poem_vec_lst7 = []

    vocab5 = []
    vocab7 = []

    with open(root_path + fileName, 'r', encoding='utf-8') as fin:
        for line in fin:
            line = (" ".join(line.strip().split("\t"))).split(" ")
            line = ["<S>"] + line + ["<E>"]
            if len(line) == 22:
                poem_line_lst5.append(line)
                vocab5.extend(line)
            elif len(line) == 30:
                poem_line_lst7.append(line)
                vocab7.extend(line)

    vocab5 = list(set(vocab5))
    vocab7 = list(set(vocab7))
    
    random.shuffle(poem_line_lst5)
    random.shuffle(poem_line_lst7)

    wd2Idx5 = {wd: idx for idx, wd in enumerate(vocab5)}
    wd2Idx7 = {wd: idx for idx, wd in enumerate(vocab7)}

    idx2Wd5 = {idx: wd for idx, wd in enumerate(vocab5)}
    idx2Wd7 = {idx: wd for idx, wd in enumerate(vocab7)}

    poem_vec_lst5 = [[wd2Idx5[wd] for wd in line] for line in poem_line_lst5]
    poem_vec_lst7 = [[wd2Idx7[wd] for wd in line] for line in poem_line_lst7]

    print(len(poem_line_lst5), len(poem_line_lst7))
    print(len(wd2Idx5), len(wd2Idx7))
    print(len(poem_vec_lst5), len(poem_vec_lst7))

    return poem_line_lst5, poem_line_lst7, wd2Idx5, wd2Idx7, idx2Wd5, idx2Wd7,poem_vec_lst5, poem_vec_lst7


poem_line_lst5, poem_line_lst7, wd2Idx5, wd2Idx7, idx2Wd5, idx2Wd7, poem_vec_lst5, poem_vec_lst7 = get_train_data( 
    "qtrain")

11274 63535
5260 6742
11274 63535


In [6]:
def get_batch(data,bat,sent_len):
    """
    @params:
        data:待划分的数据集
        bat:BATCH_SIZE
        sent_len:单句长度
    
    @returns:
        X_batch:shape: len(data)//bat,bat,seq_len,其中seq_len包含四句诗
        Y_batch:shape: len(data)//bat,bat,seq_len,其中seq_len包含后三句诗
    """
    X_batch = []
    Y_batch = []
    for idx in range(len(data)//bat):
        st = idx * bat
        ed = st + bat
        X_batch.append([vec[:sent_len] for vec in data[st:ed]])
        Y_batch.append([vec[sent_len:] for vec in data[st:ed]])
    X_batch = torch.tensor(X_batch,device=device)
    Y_batch = torch.tensor(Y_batch,device=device)
    
    return X_batch,Y_batch

X_batch,Y_batch = get_batch(poem_vec_lst7,BATCH_SIZE,LEN+1)

In [7]:
print(X_batch.shape)
print(X_batch.size(0))
# print(X_batch[0].permute(1,0))

torch.Size([496, 128, 8])
496


## 时间处理函数

In [8]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

## Encoder 模块

In [24]:
class Encoder(nn.Module):
    def __init__(self,input_size,hidden_size,vec_dim,num_layer):
        super(Encoder,self).__init__()
        self.hidden_size = hidden_size
        self.vec_dim = vec_dim
        self.embedding = nn.Embedding(input_size,vec_dim)
        self.gru = nn.GRU(vec_dim,hidden_size,bidirectional=True)
        self.num_layer = num_layer
        self.num_dir = 1 if self.gru.bidirectional == False else 2

    def forward(self,input,hidden):
        """
        @params:
            input:(seq_len,batch)
            hidden:(num_layers*num_dirs,batch,hidden_size)
        """
        seq_len,batch = input.size()

        embedded = self.embedding(input).view(seq_len,batch,-1) 
        output = embedded  # output:(seq_len,batch,vec_dim)
        output,hidden = self.gru(output,hidden) # output:(seq_len,batch,num_dir*hidden_size)
                                                # hidden:(num_layer*num_dir,batch,hidden_size)
        output = output[:,:,:self.hidden_size]+output[:,:,self.hidden_size:]
        hidden = hidden.view(self.num_layer,self.num_dir,batch,self.hidden_size)
        hidden = hidden[:,0,:,:] + hidden[:,1,:,:]
        # output:seq_len,batch,hidden_size
        # hidden:num_layer,batch,hidden_size
        return output,hidden

    def initHidden(self,bat):
        """
        @params
            bat:batch参数
        """
        return torch.zeros(self.num_layer*self.num_dir, bat, self.hidden_size, device=device)
        


## 带attention机制的Decoder模块

In [25]:
class Decoder(nn.Module):
    def __init__(self,input_size,hidden_size,vec_dim,num_layer,dropout_p):
        super(Decoder,self).__init__()
        self.hidden_size = hidden_size
        self.vec_dim = vec_dim
        self.embedding = nn.Embedding(input_size,vec_dim)
        self.encode_seq_len = LEN+1
        self.dropout_p = dropout_p
        self.input_size = input_size
        self.num_layer = num_layer

        self.gru = nn.GRU(vec_dim,hidden_size)
        self.attn = nn.Linear(self.hidden_size+self.vec_dim,self.encode_seq_len)
        self.attn_combine = nn.Linear(self.hidden_size+self.vec_dim,self.vec_dim)
        self.dropout = nn.Dropout(self.dropout_p)
        self.out = nn.Linear(self.hidden_size,self.input_size)
    
    def forward(self,input,hidden,encoder_outputs):
        """
        @params:
            encoder_outputs:encode_seq_len,batch,num_dir*hidden_size
            hidden:num_layer*num_dir,batch,hidden_size
            input:seq_len,batch
        """
        seq_len,batch = input.size()  # when decoding ,we let seq_len = 1

        embedded = self.embedding(input).view(seq_len,batch,-1)
        embedded = self.dropout(embedded)      # embedded:1,batch,vec_dim

        attn_weights = F.softmax(              # attn_weights:batch,encode_seq_len
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) 
        attn_applied = torch.bmm(attn_weights.unsqueeze(1),
                                encoder_outputs.permute(1,0,2).contiguous()) 
        # so far,shape of attn_applied:batch,1,hidden_size
        attn_applied = attn_applied.permute(1,0,2).contiguous()

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        output = F.relu(output)
        # output:1,batch,vec_dim
        output,hidden = self.gru(output,hidden)
        # output:1,batch,vec_dim
        logits = self.out(output)  # logits:1,batch,input_size
        logits = logits.view(-1,self.input_size)
        
        return logits,hidden,attn_weights
    
    def initHidden(self,bat):
        return torch.zeros(self.num_layer, bat, self.hidden_size, device=device)


## train 模块

In [11]:
def train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,criterion,wd2Idx):
    """
    @params
        input_tensor:batch,seq_len
    """

    input_tensor = input_tensor.permute(1,0).contiguous()
    target_tensor = target_tensor.permute(1,0).contiguous()

    encoder_hidden = encoder.initHidden(input_tensor.size()[1])

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_len = input_tensor.size(0)
    target_len = target_tensor.size(0)

    encoder_outputs =  torch.zeros(LEN+1,encoder.hidden_size,device = device) # 单向、batch=1
    loss = 0

    encoder_outputs,encoder_hidden = encoder(input_tensor,encoder_hidden)
    # encoder_outputs:encode_seq_len,batch,num_dir*hidden_size
    # encoder_hidden:num_layer*num_dir,batch,hidden_size
    
    decoder_input = torch.tensor([wd2Idx["<S>"]]*BATCH_SIZE,device=device).view(1,BATCH_SIZE)
    decoder_hidden = encoder_hidden

    # Teacher forcing
    for di in range(target_len):
        decoder_output,decoder_hidden,decoder_attention = decoder(
            decoder_input,decoder_hidden,encoder_outputs
        )
        loss += criterion(decoder_output,target_tensor[di])
        decoder_input = target_tensor[di].view(1,-1)
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()/target_len

## trainIters 模块

In [12]:
def trainIters(encoder, decoder, wd2Idx,epoch,print_every=100, plot_every=100, learning_rate=0.005):

    global X_batch,Y_batch

    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss()
    batch_len = len(X_batch)
    for ep in range(epoch):
        print("epoch:{}".format(ep))
        for iter in range(0, batch_len):
            input_tensor = X_batch[iter]
            target_tensor = Y_batch[iter]

            loss = train(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion,wd2Idx)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, (iter+1) / batch_len),
                                            iter+1, (iter+1) / batch_len * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

    # showPlot(plot_losses)

In [28]:
hidden_size = 256
vec_dim = 200
num_layer = 1
encoder = Encoder(len(wd2Idx7), hidden_size,vec_dim,num_layer).to(device)
decoder = Decoder(len(wd2Idx7), hidden_size,vec_dim,num_layer,dropout_p=0.1).to(device)



In [29]:
trainIters(encoder, decoder,wd2Idx7,10,print_every=5)

epoch:0
0m 3s (- 26m 17s) (1 0%) 1.7628
0m 4s (- 6m 15s) (6 1%) 7.6173
0m 6s (- 4m 25s) (11 2%) 7.1021
0m 7s (- 3m 42s) (16 3%) 7.1024
0m 8s (- 3m 19s) (21 4%) 7.0840
0m 10s (- 3m 4s) (26 5%) 7.0463
0m 11s (- 2m 54s) (31 6%) 7.0565
0m 13s (- 2m 46s) (36 7%) 7.0402
0m 14s (- 2m 40s) (41 8%) 6.9753
0m 15s (- 2m 35s) (46 9%) 6.9334
0m 17s (- 2m 31s) (51 10%) 6.9024
0m 18s (- 2m 27s) (56 11%) 6.9468
0m 20s (- 2m 23s) (61 12%) 6.9320
0m 21s (- 2m 19s) (66 13%) 6.9068
0m 22s (- 2m 16s) (71 14%) 6.8750
0m 24s (- 2m 14s) (76 15%) 6.8342
0m 25s (- 2m 11s) (81 16%) 6.8288
0m 26s (- 2m 8s) (86 17%) 6.8010
0m 28s (- 2m 6s) (91 18%) 6.7568


KeyboardInterrupt: 

## Eval 函数

In [30]:
def evaluate(encoder, decoder,input_tensor,wd2Idx,idx2Wd):
    """
    @params:
        input_tensor:1,seq_len
    """
    with torch.no_grad():
        input_tensor = input_tensor.permute(1,0).contiguous() # seq_len,1
        input_length = input_tensor.size()[0]

        encoder_hidden = encoder.initHidden(1)
        encoder_outputs = torch.zeros(LEN+1,encoder.hidden_size, device=device)
        encoder_outputs,encoder_hidden = encoder(input_tensor,encoder_hidden)
        decoder_input = torch.tensor([wd2Idx["<S>"]], device=device).view(1,-1)  # <S>

        decoder_hidden = encoder_hidden

        decoded_words = []
        # decoder_attentions = torch.zeros(encode_seq_len, encode_seq_len)

        for di in range(4 * LEN):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            # decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == wd2Idx["<E>"]:
                decoded_words.append('<E>')
                break
            else:
                decoded_words.append(idx2Wd[topi.item()])

            decoder_input = topi.squeeze().detach().view(1,-1)

        return decoded_words

In [31]:
input_tensor = X_batch[3][0].view(1,-1)
print(input_tensor.size())
encoded_words = [idx2Wd7[idx.item()] for idx in input_tensor[0]]
decoded_words = evaluate(encoder,decoder,input_tensor,wd2Idx7,idx2Wd7)
print("".join(encoded_words[1:]))
for i in range(len(decoded_words[:-1])):
    print(decoded_words[i],end="")
    if (i+1)%7 == 0:
        print("")

torch.Size([1, 8])
晴日东山饱看花
一片一年不是一
年不是一年不是
一年

In [17]:
a = torch.tensor([[[1,2,3],[3,4,5]],[[1,2,5],[6,7,8]]])
print(a)

tensor([[[1, 2, 3],
         [3, 4, 5]],

        [[1, 2, 5],
         [6, 7, 8]]])


In [18]:
a = 1
print("a={}".format(a))

a=1
