In [59]:
import time
import math
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F

import sys
sys.path.append("..")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

In [60]:
import random
import zipfile

with zipfile.ZipFile('jaychou_lyrics.zip') as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars = f.read().decode('utf-8')
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
corpus_indices = [char_to_idx[char] for char in corpus_chars]

In [61]:
# corpus_indices, char_to_idx, idx_to_char, vocab_size

In [62]:
num_hiddens = 256
# rnn_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens) # 已测试
rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)
num_steps = 35
batch_size = 2
state = None
X = torch.rand(num_steps, batch_size, vocab_size)
Y, state_new = rnn_layer(X, state)
print(Y.shape, len(state_new), state_new[0].shape)

torch.Size([35, 2, 256]) 1 torch.Size([2, 256])


In [63]:
class RNNModel(nn.Module):
    def __init__(self, rnn_layer, vocab_size):
        super(RNNModel, self).__init__()
        self.rnn = rnn_layer
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1) 
        self.vocab_size = vocab_size
        self.dense = nn.Linear(self.hidden_size, vocab_size)
        self.state = None

    def forward(self, inputs, state): # inputs: (batch, seq_len)
        # 获取one-hot向量表示
        
        
#         inputs = inputs.reshape(inputs.shape[0])
#         print(inputs.shape)
#         print(inputs)
        inputs = torch.tensor(inputs.reshape(inputs.shape[0])) 
        print(inputs)
        X = F.one_hot(inputs, self.vocab_size) # X是个list
        print(X)
        X = X.view(1, 1, vocab_size)
        print(X.shape)
        Y, self.state = self.rnn(torch.tensor(X, dtype=torch.float32), state)
        # 全连接层会首先将Y的形状变成(num_steps * batch_size, num_hiddens)，它的输出
        # 形状为(num_steps * batch_size, vocab_size)
        output = self.dense(Y.view(-1, Y.shape[-1]))
        return output, self.state
    
def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, device, idx_to_char,
                      char_to_idx):
    state = None
    output = [char_to_idx[prefix[0]]] # output会记录prefix加上输出
    for t in range(num_chars + len(prefix) - 1):
        X = torch.tensor([output[-1]], device=device).view(1, 1)
        if state is not None:
            if isinstance(state, tuple): # LSTM, state:(h, c)  
                state = (state[0].to(device), state[1].to(device))
            else:   
                state = state.to(device)
#         X = torch.tensor(X, dtype=torch.float32) 
        (Y, state) = model(X, state)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in output])

model = RNNModel(rnn_layer, vocab_size).to(device)
predict_rnn_pytorch('分开', 10, model, vocab_size, device, idx_to_char, char_to_idx)

tensor([909])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([1020])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([764])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([577])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([764])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([577])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([764])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([749])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([912])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([912])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])
tensor([650])
tensor([[0, 0, 0,  ..., 0, 0, 0]])
torch.Size([1, 1, 1027])


  inputs = torch.tensor(inputs.reshape(inputs.shape[0]))
  Y, self.state = self.rnn(torch.tensor(X, dtype=torch.float32), state)


'分开榉文榉文榉备酒酒惚酒'