In [1]:
import time
import math
import numpy as np
import torch
from torch import nn,optim
import torch.nn.functional as F
import random
import zipfile
device='cpu'

In [2]:
def load_data_jay_lyrics():
    with zipfile.ZipFile(r'F:\study\ml\ebooks3\6\jaychou_lyrics.txt.zip') as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            corpus_chars=f.read().decode('utf-8')
    corpus_chars=corpus_chars.replace('\n',' ').replace('\r',' ')
    corpus_chars=corpus_chars[0:10000]
    idx_to_char=list(set(corpus_chars))
    char_to_idx=dict([( char,i ) for i , char in enumerate(idx_to_char)])
    vocab_size=len(char_to_idx)
    corpus_indices=[char_to_idx[char] for char in corpus_chars]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size

In [3]:
corpus_indices,char_to_idx,idx_to_char,vocab_size=load_data_jay_lyrics()

In [4]:
num_inputs,num_hiddens,num_outputs=vocab_size,256,vocab_size

In [5]:
vocab_size

1027

In [6]:
def to_onehot(x,n_class):
    def _one_hot(x,n_class):
        x=x.long()
        res=torch.zeros(x.shape[0],vocab_size,dtype=torch.float32)
        res.scatter_(1,x.view(-1,1),1)
        return res
    return [_one_hot(x[:,i],n_class) for i in range(x.shape[1])]
        

In [7]:
class RNNModel(nn.Module):
    def __init__(self,rnn_layer,vocab_size):
        super().__init__()
        self.rnn=rnn_layer
        self.hidden_size=rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size=vocab_size
        self.dense=nn.Linear(self.hidden_size,self.vocab_size)
        self.state=None
    
    def forward(self,inputs,state):
        x=to_onehot(inputs,self.vocab_size)
        print('x new shape :',torch.stack(x).shape)
        y,self.state=self.rnn(torch.stack(x),state)
        output=self.dense(y.view(-1,y.shape[-1]))
        return output,self.state


In [8]:
def predict_rnn_pytorch(prefix,num_chars,model,vocab_size,device,idx_to_char,char_to_idx):
    state=None
    outputs=[char_to_idx[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        x=torch.Tensor([outputs[-1]]).view(1,1)
        if state is not None:
            if isinstance(state,tuple):
                state=(state[0],state[1])
            else:
                state=state
        (y,state)=model(x,state)
        if t<len(prefix)-1:
            outputs.append(char_to_idx[prefix[t+1]])
        else:
            outputs.append(int(y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in outputs])

In [9]:
lr=1e-2
lstm_layer=nn.LSTM(input_size=vocab_size,hidden_size=num_hiddens)
model=RNNModel(lstm_layer,vocab_size)
predict_rnn_pytorch('分开',10,model,vocab_size,device,idx_to_char,char_to_idx)

'分开区区区区区区区区区区'

In [10]:
def grad_clipping(params,theta,device):
    norm=torch.Tensor([0.0])
    for p in params:
        norm+=(p.grad.data **2).sum()
    norm=norm.sqrt().item()
    if norm > theta:
        for p in params:
            p.grad.data *= (theta/norm)

In [11]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,device=None):
    if device is None:
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    corpus_indices=torch.Tensor(corpus_indices,device=device)
    data_len=len(corpus_indices)
    batch_len=data_len//batch_size
    indices=corpus_indices[0:batch_size*batch_len].view(batch_size,batch_len)
    epoch_size=(batch_len-1)//num_steps
    for i in range(epoch_size):
        i=i*num_steps
        X=indices[:,i:i+num_steps]
        Y=indices[:,i+1:i+num_steps+1]
        yield X,Y

In [12]:
len(corpus_indices)

10000

In [13]:
10000/32

312.5

In [14]:
10000/32/9

34.72222222222222

In [17]:
for x ,y  in data_iter_consecutive(corpus_indices,32,35):
    print(y)
    print('x shape',x.shape)
    print('y shape',y.shape)

tensor([[ 486.,  264.,   57.,  ...,  627.,  819.,  483.],
        [ 474.,   10.,  660.,  ...,   10.,  227.,  777.],
        [  93.,  140.,  597.,  ...,   10.,  281.,  244.],
        ...,
        [ 834.,  627.,   44.,  ...,  561.,  602.,  800.],
        [ 136.,  885.,  960.,  ...,   83., 1004.,  992.],
        [ 565.,  627.,  937.,  ...,  627.,  252.,  908.]])
x shape torch.Size([32, 35])
y shape torch.Size([32, 35])
tensor([[ 172.,  483.,  172.,  ...,  992., 1020.,  806.],
        [ 223.,  979.,  627.,  ...,  660.,  819.,  236.],
        [ 627., 1016.,  124.,  ...,  786.,  627.,  511.],
        ...,
        [ 887.,  517.,  790.,  ...,   10.,  421.,  942.],
        [ 466.,  374.,  627.,  ...,   69.,  351.,  627.],
        [ 485.,  745.,  136.,  ...,  188.,  627.,  355.]])
x shape torch.Size([32, 35])
y shape torch.Size([32, 35])
tensor([[ 627.,  660.,  819.,  ...,  627.,  113.,  474.],
        [   6.,   10.,  227.,  ...,  627.,  489.,  663.],
        [ 276.,  992.,  423.,  ...,  695., 1

In [14]:
32*35

1120

In [15]:
def train_and_predict_rnn_pytorch(model,num_hiddens,vocab_size,device,corpus_indices,idx_to_char,char_to_idx,num_epochs,num_steps,lr,
                                 clipping_theta,batch_size,pred_period,pred_len,prefixes):
    loss=nn.CrossEntropyLoss()
    optimizer=optim.Adam(model.parameters(),lr=lr)
    state=None
    
    for epoch in range(num_epochs):
        l_sum,n,start=0.0,0,time.time()
        data_iter=data_iter_consecutive(corpus_indices,batch_size,num_steps)
        for x,y in data_iter:
            if state is not None:
                if isinstance(state,tuple):
                    state=(state[0].detach(),state[1].detach())
                else:
                    state=state.detach()
            (output,state)=model(x,state)
            y=torch.transpose(y,0,1).contiguous().view(-1)
            print('output: ' ,output.shape)
            print('y: ' ,y.shape)
            print('----------------')
            print('y',y)
            print('output',output)
            
            l=loss(output,y.long())
            optimizer.zero_grad()
            l.backward()
            grad_clipping(model.parameters(),clipping_theta,device)
            optimizer.step()
            
            l_sum +=l.item()*y.shape[0]
            n+=y.shape[0]
        try:
            perplextiry=math.exp(l_sum/n)
        except OverflowError:
            perplextiry=float('inf')
        
        if (epoch+1) % pred_period==0:
            print('epoch %d,perplexity %f ,time %.2f sec' % (epoch+1,perplextiry,time.time()-start))
            
            for prefix in prefixes:
                print('-',predict_rnn_pytorch(prefix,pred_len,model,vocab_size,device,idx_to_char,char_to_idx))
            

In [16]:
num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32,1e2, 1e-2
pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开']

In [21]:
num_steps

35

In [17]:
lr=1e-2
gru_layer=nn.GRU(input_size=vocab_size,hidden_size=num_hiddens)
model=RNNModel(gru_layer,vocab_size)
train_and_predict_rnn_pytorch(model,num_hiddens,vocab_size,device,corpus_indices,idx_to_char,char_to_idx,num_epochs,num_steps,lr,
                                 clipping_theta,batch_size,pred_period,pred_len,prefixes)

epoch 40,perplexity 1.037839 ,time 0.66 sec
- 分开始想像 爸和妈当年的模样 说著一口吴侬软语的姑娘缓缓走过外滩 消失的 旧时光 一九四三 回头看 的片
- 不分开始乡相信命运 感谢地心引力 让我碰到你 漂亮的让我面红的可爱女人 温柔的让我心疼的可爱女人 透明的让
epoch 80,perplexity 1.011837 ,time 0.69 sec
- 分开始想像 爸和妈当年的模样 说著一口吴侬软语的姑娘缓缓走过外滩 消失的 旧时光 一九四三 回头看 的片
- 不分开 干什么 干什么 已被我一脚踢开 快使用双截棍 哼哼哈兮 快使用双截棍 哼哼哈兮 习武之人切记 仁者
epoch 120,perplexity 1.008159 ,time 0.78 sec
- 分开始打呼 管家是一只会说法语举止优雅的猪 吸血前会念约翰福音做为弥补 拥有一双蓝色眼睛的凯萨琳公主 专
- 不分开 我跟你看棒球 想这样没担忧 唱着歌 一直走 我想就这样牵着你的手不放开 爱可不可以简简单单没有伤害
epoch 160,perplexity 1.264831 ,time 0.69 sec
- 分开始想像一直在这样的感受 都能承受我已无能为力 我该好好生活 我该好好生活 我该好好生活 我该好好生活
- 不分开始移动 三两银够不够 景色入秋 漫天黄沙凉过 塞北的客栈人多 牧草有没有什么兵器最喜欢 我不能再想 


In [32]:
a=torch.Tensor([[123]])
a.long

<function Tensor.long>

In [33]:
a=torch.Tensor([[123]])
a.long()

tensor([[123]])