In [1]:
import time
import math
import numpy as np
import torch
from torch import nn,optim
import torch.nn.functional as F
import random
import zipfile

In [4]:
device='cpu'

In [5]:
def load_data_jay_lyrics():
    with zipfile.ZipFile(r'F:\study\ml\ebooks3\6\jaychou_lyrics.txt.zip') as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            corpus_chars=f.read().decode('utf-8')
    corpus_chars=corpus_chars.replace('\n',' ').replace('\r',' ')
    corpus_chars=corpus_chars[0:10000]
    idx_to_char=list(set(corpus_chars))
    char_to_idx=dict([( char,i ) for i , char in enumerate(idx_to_char)])
    vocab_size=len(char_to_idx)
    corpus_indices=[char_to_idx[char] for char in corpus_chars]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size

In [6]:
corpus_indices, char_to_idx, idx_to_char, vocab_size=load_data_jay_lyrics()

In [14]:
def one_hot(x,n_class,dtype=torch.float32):
    x=x.long()
    res=torch.zeros(x.shape[0],n_class,dtype=dtype,device=x.device)
    res=res.scatter(1,x.view(-1,1),1)
    return res

In [15]:
def to_onehot(X,n_class):
    return [one_hot(X[:,i],n_class) for i in range(X.shape[1])]

In [16]:
x1=torch.arange(10).view(2,5)
x1

tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])

In [17]:
to_onehot(x1,10)

[tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]]),
 tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]]),
 tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]]),
 tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]]),
 tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])]

In [18]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,device=None):
    if device is None:
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    corpus_indices=torch.Tensor(corpus_indices)
    data_len=len(corpus_indices)
    batch_len=data_len // batch_size
    indices=corpus_indices[0:batch_size*batch_len].view(batch_size,batch_len)
    epoch_size=(batch_len-1) // num_steps
    for i in range(epoch_size):
        i=i*num_steps
        x=indices[:,i:i+num_steps]
        y=indices[:,i+1:i+num_steps+1]
        yield x,y

In [20]:
def sgd(params,lr,batch_size):
    for p in params:
        p.data -= lr * p.grad /batch_size

In [21]:
num_inputs,num_hiddens,num_outputs=vocab_size,256,vocab_size

In [22]:
def get_params():
    def _one(shape):
        ts=torch.Tensor(np.random.normal(0,0.01,size=shape))
        return torch.nn.Parameter(ts,requires_grad=True)
    
    W_xh=_one((num_inputs,num_hiddens))
    W_hh=_one((num_hiddens,num_hiddens))
    b_h=torch.nn.Parameter(torch.zeros(num_hiddens,requires_grad=True))
    W_hq=_one((num_hiddens,num_outputs))
    b_q=torch.nn.Parameter(torch.zeros(num_outputs,requires_grad=True))
    return nn.ParameterList([W_xh,W_hh,b_h,W_hq,b_q])

In [23]:
def init_rnn_state(batch_size,num_hiddens,device):
    return (torch.zeros((batch_size,num_hiddens)),)

In [29]:
def rnn(inputs,state,params):
    W_xh,W_hh,b_h,W_hq,b_q=params
    H,=state
    outputs=[]
    for x in inputs:
        H=torch.tanh(torch.matmul(x,W_xh)+torch.matmul(H,W_hh)+b_h)
        Y=torch.matmul(H,W_hq)+b_q
        outputs.append(Y)
    return outputs,(H,)

In [30]:
state=init_rnn_state(x1.shape[0],num_hiddens,device)
inputs=to_onehot(x1.to(device),vocab_size)
params=get_params()
outputs,state_new=rnn(inputs,state,params)
print(len(outputs),outputs[0].shape,state_new[0].shape)

5 torch.Size([2, 1027]) torch.Size([2, 256])


In [31]:
def predict_rnn(prefix,num_chars,rnn,params,init_rnn_state,num_hiddens,vocab_size,device,idx_to_char,char_to_idx):
    state=init_rnn_state(1,num_hiddens,device)
    output=[char_to_idx[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        X=to_onehot(torch.Tensor([[output[-1]]]),vocab_size)
        (Y,state)=rnn(X,state,params)
        if t < len(prefix)-1:
            output.append(char_to_idx[prefix[t+1]])
        else:
            output.append(int(Y[0].argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in output])

In [32]:
predict_rnn('分开',10,rnn,params,init_rnn_state,num_hiddens,vocab_size,device,idx_to_char,char_to_idx)

'分开水萨硬蕃楷鼠转疯水热'

In [33]:
def grad_clipping(params,theta,device):
    norm=torch.Tensor([0.0])
    for p in params:
        norm +=(p.grad.data**2).sum()
    norm=norm.sqrt().item()
    if norm>theta:
        for p in params:
            p.grad.data *= (theta/norm)

In [51]:
def train_and_predict_rnn(rnn,get_params,init_rnn_state,num_hiddens,vocab_size,corpus_indices,idx_to_char,char_to_idx,is_random_iter,
                          num_epochs,num_steps,lr,clipping_theta,batch_size,pred_period,pred_len,prefixes):
    if is_random_iter:
        data_iter_fn=data_iter_random
    else:
        data_iter_fn=data_iter_consecutive
    params=get_params()
    loss=nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        if not is_random_iter:
            state=init_rnn_state(batch_size,num_hiddens,device)
        l_sum,n,start=0.0,0,time.time()
        data_iter=data_iter_fn(corpus_indices,batch_size,num_steps)
        for X,Y in data_iter:
            if is_random_iter:
                state=init_rnn_state(batch_size,num_hiddens,device)
            else:
                for s in state:
                    s.detach()
            inputs=to_onehot(X,vocab_size)
            (outputs,state)=rnn(inputs,state,params)
            print('outputs shape : ',len(outputs),'',outputs[0].shape)
            outputs=torch.cat(outputs,dim=0)
            print('outputs shape1 : ',outputs.shape)
            print('y shape : ',Y.shape)
#             转置的目的与outputs对应
#             Y: batch_size ,num_steps
#             outputs :num_steps,batch_size
#             所以Y要转置,才能和outputs对应,才能计算 loss
            y=torch.transpose(Y,0,1).contiguous().view(-1)
            print('y shape1 : ',y.shape)
            l=loss(outputs,y.long())
            
            if params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward(retain_graph=True)
            grad_clipping(params,clipping_theta,device)
            sgd(params,lr,1)
            l_sum +=l.item() * y.shape[0]
            n+=y.shape[0]
            
            if (epoch + 1) % pred_period ==0:
                print('epoch %d,perplexity %f,time %.2f sec' % (epoch + 1, math.exp(l_sum / n), time.time() - start))
                for prefix in prefixes:
                    print('-',predict_rnn(prefix,pred_len,rnn,params,init_rnn_state,num_hiddens,vocab_size,device,idx_to_char,char_to_idx))
        

In [52]:
num_epochs,num_steps,batch_size,lr,clipping_theta=40,35,32,1e2,1e-2
pred_period,pred_len,prefixes=20,20,['分开','不分开']

In [53]:
train_and_predict_rnn(rnn,get_params,init_rnn_state,num_hiddens,vocab_size,
                     corpus_indices,idx_to_char,char_to_idx,None,
                     num_epochs,num_steps,lr,clipping_theta,batch_size,pred_period,
                     pred_len,prefixes)

outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  to

outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  to

outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  to

outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  to

outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  to

outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  torch.Size([32, 35])
y shape1 :  torch.Size([1120])
outputs shape :  35  torch.Size([32, 1027])
outputs shape1 :  torch.Size([1120, 1027])
y shape :  to

In [56]:
a=np.array([[1,2,3,4,5],[6,7,8,9,10]])
a.T.reshape(-1)

array([ 1,  6,  2,  7,  3,  8,  4,  9,  5, 10])

In [57]:
a=np.array([[1,2,3,4,5],[6,7,8,9,10]])
a.reshape(-1)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])