In [6]:
import torch
import random
import zipfile

with zipfile.ZipFile('../jaychou_lyrics.txt.zip') as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars = f.read().decode('utf-8')
corpus_chars[:40]

In [7]:
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]

In [8]:
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
vocab_size # 1027

In [15]:
corpus_indices = [char_to_idx[char] for char in corpus_chars]
sample = corpus_indices[:20]
print('chars:', ''.join([idx_to_char[idx] for idx in sample]))
print('indices:', sample)


chars: 想要有直升机 想要和你飞到宇宙去 想要和
indices: [20, 18, 52, 442, 967, 436, 639, 20, 18, 579, 273, 56, 794, 818, 972, 933, 639, 20, 18, 579]


In [14]:
import torch

x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [16]:
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)

In [17]:
my_seq = list(range(30))
for X, Y in data_iter_random(my_seq, batch_size=2, num_steps=6):
    print('X: ', X, '\nY:', Y, '\n')

X:  tensor([[ 6.,  7.,  8.,  9., 10., 11.],
        [18., 19., 20., 21., 22., 23.]], device='cuda:0') 
Y: tensor([[ 7.,  8.,  9., 10., 11., 12.],
        [19., 20., 21., 22., 23., 24.]], device='cuda:0') 

X:  tensor([[ 0.,  1.,  2.,  3.,  4.,  5.],
        [12., 13., 14., 15., 16., 17.]], device='cuda:0') 
Y: tensor([[ 1.,  2.,  3.,  4.,  5.,  6.],
        [13., 14., 15., 16., 17., 18.]], device='cuda:0') 



In [5]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import zipfile
import math


In [6]:
def load_data_jay_lyrics():
    """加载周杰伦歌词数据集"""
    with zipfile.ZipFile('jaychou_lyrics.txt.zip') as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            corpus_chars = f.read().decode('utf-8')
    corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
    corpus_chars = corpus_chars[0:10000]
    idx_to_char = list(set(corpus_chars))
    char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
    vocab_size = len(char_to_idx)
    corpus_indices = [char_to_idx[char] for char in corpus_chars]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size

(corpus_indices, char_to_idx, idx_to_char, vocab_size) = load_data_jay_lyrics()


In [7]:
def data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx=None):
    corpus_indices = np.array(corpus_indices)
    data_len = len(corpus_indices)
    batch_len = data_len // batch_size
    indices = corpus_indices[0: batch_size*batch_len].reshape((
        batch_size, batch_len))
    epoch_size = (batch_len - 1) // num_steps
    for i in range(epoch_size):
        i = i * num_steps
        X = indices[:, i: i + num_steps]
        Y = indices[:, i + 1: i + num_steps + 1]
        yield X, Y


In [8]:
num_hiddens = 256
cell = keras.layers.SimpleRNNCell(num_hiddens, 
                                  kernel_initializer='glorot_uniform')
rnn_layer = keras.layers.RNN(cell,time_major=True,
                            return_sequences=True,return_state=True)


In [9]:
batch_size = 2
state = rnn_layer.cell.get_initial_state(batch_size=batch_size,dtype=tf.float32)
state.shape


TensorShape([2, 256])

In [10]:
num_steps = 35
X = tf.random.uniform(shape=(num_steps, batch_size, vocab_size))
print(X.shape)
Y, state_new = rnn_layer(X, state)
print(Y.shape)
print(len(state_new))
print(state_new[0].shape)


(35, 2, 1027)
(35, 2, 256)
2
(256,)


In [11]:
class RNNModel(tf.keras.Model):
    def __init__(self, rnn_layer, vocab_size):
        super().__init__()
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.dense = keras.layers.Dense(vocab_size)
        
    def call(self, inputs, state):
        # 将输入转置成(num_steps, batch_size)后获取one-hot向量表示
        X = tf.one_hot(tf.transpose(inputs), self.vocab_size)
        Y, state = self.rnn(X, state)
        # 全连接层会首先将Y的形状变成(num_steps * batch_size, num_hiddens)，
        # 它的输出形状为(num_steps * batch_size, vocab_size)
        output = self.dense(tf.reshape(Y, (-1, Y.shape[-1])))
        return output, state
    
    def get_initial_state(self, *args, **kwargs):
        return self.rnn.cell.get_initial_state(*args, **kwargs)

model = RNNModel(rnn_layer, vocab_size)


In [12]:
def predict_rnn_keras(prefix, num_chars):
    # 使用model的成员函数来初始化隐藏状态
    state = model.get_initial_state(batch_size=1,dtype=tf.float32)
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars + len(prefix) - 1):
        X = np.array([output[-1]]).reshape((1, 1))
        Y, state = model(X, state)  # 前向计算不需要传入模型参数
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(np.array(tf.argmax(Y,axis=-1))))

    return ''.join([idx_to_char[i] for i in output])


In [13]:
predict_rnn_keras('分开', 10)


  output.append(int(np.array(tf.argmax(Y,axis=-1))))


'分开烧写向睡于草寒怯典知'

In [14]:
# 计算裁剪后的梯度
def grad_clipping(grads,theta):
    norm = np.array([0])
    for i in range(len(grads)):
        norm+=tf.math.reduce_sum(grads[i] ** 2)
    norm = np.sqrt(norm).item()
    new_gradient=[]
    if norm > theta:
        for grad in grads:
            new_gradient.append(grad * theta / norm)
    else:
        for grad in grads:
            new_gradient.append(grad)  
    return new_gradient


In [15]:
lr = 1e2
optimizer=tf.keras.optimizers.SGD(learning_rate=lr)


In [16]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
def train_step(X, state, Y, clipping_theta=1e-2):
    with tf.GradientTape(persistent=True) as tape:
        (outputs, state) = model(X, state)
        y = Y.T.reshape((-1,))
        l = loss_object(y,outputs)

    grads = tape.gradient(l, model.variables)
    # 梯度裁剪
    grads=grad_clipping(grads, clipping_theta)
    optimizer.apply_gradients(zip(grads, model.variables))  # 因为已经误差取过均值，梯度不用再做平均
    return l, y


In [17]:
def train_and_predict_rnn_keras(num_epochs, batch_size, pred_period, pred_len, prefixes):
    
    for epoch in range(num_epochs):
        l_sum, n = 0.0, 0
        data_iter = data_iter_consecutive(
            corpus_indices, batch_size, num_steps)
        state = model.get_initial_state(batch_size=batch_size,dtype=tf.float32)
        for X, Y in data_iter:
            l, y = train_step(X, state, Y)
            l_sum += np.array(l).item() * len(y)
            n += len(y)

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f' % (
                epoch + 1, math.exp(l_sum / n)))
            for prefix in prefixes:
                print(' -', predict_rnn_keras(prefix, pred_len))


In [18]:
num_epochs, batch_size = 250, 32
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']
train_and_predict_rnn_keras(num_epochs, batch_size, pred_period,
                            pred_len, prefixes)


epoch 50, perplexity 2049.541150


  output.append(int(np.array(tf.argmax(Y,axis=-1))))


 - 分开小们晰空揍些些奏古藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏
 - 不分开们晰空揍些些奏古藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏藏
epoch 100, perplexity 3890.516798
 - 分开想烁亮烛瓣箱弃好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好
 - 不分开鸠亮烛瓣箱弃好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好好
epoch 150, perplexity 809219.088307
 - 分开 我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我
 - 不分开我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我
epoch 200, perplexity 2974.233151
 - 分开倦糗抄沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼
 - 不分开糗抄沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼沼
epoch 250, perplexity 985.613386
 - 分开待鼠抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄
 - 不分开抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄抄
