In [1]:
import time
import math
import numpy as np
import torch
from torch import nn,optim
import torch.nn.functional as F
import random
import zipfile
device='cpu'

In [2]:
def load_data_jay_lyrics():
    with zipfile.ZipFile(r'F:\study\ml\ebooks3\6\jaychou_lyrics.txt.zip') as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            corpus_chars=f.read().decode('utf-8')
    corpus_chars=corpus_chars.replace('\n',' ').replace('\r',' ')
    corpus_chars=corpus_chars[0:10000]
    idx_to_char=list(set(corpus_chars))
    char_to_idx=dict([( char,i ) for i , char in enumerate(idx_to_char)])
    vocab_size=len(char_to_idx)
    corpus_indices=[char_to_idx[char] for char in corpus_chars]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size

In [3]:
corpus_indices,char_to_idx,idx_to_char,vocab_size=load_data_jay_lyrics()

In [4]:
num_inputs,num_hiddens,num_outputs=vocab_size,256,vocab_size

In [5]:
def to_onehot(x,n_class):
    def _one_hot(x,n_class):
        x=x.long()
        res=torch.zeros(x.shape[0],vocab_size,dtype=torch.float32)
        res.scatter_(1,x.view(-1,1),1)
        return res
    return [_one_hot(x[:,i],n_class) for i in range(x.shape[1])]
        

In [12]:
class RNNModel(nn.Module):
    def __init__(self,rnn_layer,vocab_size):
        super().__init__()
        self.rnn=rnn_layer
        self.hidden_size=rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size=vocab_size
        self.dense=nn.Linear(self.hidden_size,self.vocab_size)
        self.state=None
    
    def forward(self,inputs,state):
        x=to_onehot(inputs,self.vocab_size)
        y,self.state=self.rnn(torch.stack(x),state)
        output=self.dense(y.view(-1,y.shape[-1]))
        return output,self.state


In [13]:
def predict_rnn_pytorch(prefix,num_chars,model,vocab_size,device,idx_to_char,char_to_idx):
    state=None
    outputs=[char_to_idx[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        x=torch.Tensor([outputs[-1]]).view(1,1)
        if state is not None:
            if isinstance(state,tuple):
                state=(state[0],state[1])
            else:
                state=state
        (y,state)=model(x,state)
        if t<len(prefix)-1:
            outputs.append(char_to_idx[prefix[t+1]])
        else:
            outputs.append(int(y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in outputs])

In [14]:
lr=1e-2
lstm_layer=nn.LSTM(input_size=vocab_size,hidden_size=num_hiddens)
model=RNNModel(lstm_layer,vocab_size)
predict_rnn_pytorch('分开',10,model,vocab_size,device,idx_to_char,char_to_idx)

'分开站站将脸碰峡峡碰峡峡'

In [15]:
def grad_clipping(params,theta,devie):
    norm=torch.Tensor([0.0])
    for p in params:
        norm+=(p.grad.data **2).sum()
    norm=norm.sqrt().item()
    if norm > theta:
        for p in params:
            p.grad.data *= (theta/norm)

In [None]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,device=None):
    if device is None:
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    corpus_indices=torch.Tensor(corpus_indices,device=device)
    