In [15]:
import numpy as np
import matplotlib.pyplot as plt

# 定义RNN单元的参数
input_size = 12  # 输入向量的维度
hidden_size = 6  # 隐藏状态的维度
output_size = 1  # 输出向量的维度

# 初始化权重矩阵
W_xh = np.random.randn(hidden_size, input_size)  # 输入到隐藏状态的权重
W_hh = np.random.randn(hidden_size, hidden_size)  # 隐藏状态到隐藏状态的权重
W_hy = np.random.randn(output_size, hidden_size)  # 隐藏状态到输出的权重

# 初始化偏置
b_h = np.zeros((hidden_size, 1))  # 隐藏状态的偏置
b_y = np.zeros((output_size, 1))  # 输出的偏置


def rnn_step(x_t, h_prev):
    # 计算隐藏状态
    h_t = np.tanh(np.dot(W_xh, x_t) + np.dot(W_hh, h_prev) + b_h)
    # 计算输出
    y_t = np.dot(W_hy, h_t) + b_y
    return h_t, y_t

In [16]:
# 输入序列
x_sequence = [np.random.randn(input_size, 1) for _ in range(5)]  # 假设长度为5的输入序列
# 真实输出
y_sequence = [np.random.randn(output_size, 1) for _ in range(5)]


def forward_propagation(x_sequence):
    h_prev = np.zeros((hidden_size, 1))
    y_preds = []
    h_states = []
    for x_t in x_sequence:
        h_t, y_t = rnn_step(x_t, h_prev)
        y_preds.append(y_t)
        h_states.append(h_t)
        h_prev = h_t
    return y_preds, h_states


def calculate_loss(y_preds, y_true):
    loss = 0
    for y_pred, y_t in zip(y_preds, y_true):
        loss += np.sum((y_pred - y_t) ** 2)
    return loss / len(y_true)

In [17]:
def backward_propagation(x_sequence, y_sequence, y_preds, h_states):
    dW_xh = np.zeros_like(W_xh)
    dW_hh = np.zeros_like(W_hh)
    dW_hy = np.zeros_like(W_hy)
    db_h = np.zeros_like(b_h)
    db_y = np.zeros_like(b_y)

    dh_next = np.zeros_like(h_states[0])

    for t in reversed(range(len(x_sequence))):
        dy = y_preds[t] - y_sequence[t]
        dW_hy += np.dot(dy, h_states[t].T)
        db_y += dy

        dh = np.dot(W_hy.T, dy) + dh_next
        dh_raw = (1 - h_states[t] ** 2) * dh  # tanh的导数

        db_h += dh_raw
        dW_xh += np.dot(dh_raw, x_sequence[t].T)
        dW_hh += np.dot(dh_raw, h_states[t - 1].T)

        dh_next = np.dot(W_hh.T, dh_raw)

    return dW_xh, dW_hh, dW_hy, db_h, db_y

In [18]:
# 定义学习率
learning_rate = 0.01


def update_parameters(dW_xh, dW_hh, dW_hy, db_h, db_y):
    global W_xh, W_hh, W_hy, b_h, b_y
    W_xh -= learning_rate * dW_xh
    W_hh -= learning_rate * dW_hh
    W_hy -= learning_rate * dW_hy
    b_h -= learning_rate * db_h
    b_y -= learning_rate * db_y

In [19]:
# 定义训练函数
def train_rnn(x_sequences, y_sequences, epochs=100):
    for epoch in range(epochs):
        total_loss = 0
        for x_sequence, y_sequence in zip(x_sequences, y_sequences):
            # 前向传播
            y_preds, h_states = forward_propagation(x_sequence)
            # 计算损失
            loss = calculate_loss(y_preds, y_sequence)
            total_loss += loss
            # 反向传播
            dW_xh, dW_hh, dW_hy, db_h, db_y = backward_propagation(
                x_sequence, y_sequence, y_preds, h_states
            )
            # 更新参数
            update_parameters(dW_xh, dW_hh, dW_hy, db_h, db_y)

        # 打印每个epoch的平均损失
        average_loss = total_loss / len(x_sequences)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {average_loss:.4f}")

In [20]:
# 生成训练数据
x_sequences = [[np.random.randn(input_size, 1) for _ in range(5)] for _ in range(100)]
y_sequences = [[np.random.randn(output_size, 1) for _ in range(5)] for _ in range(100)]


# 训练模型
train_rnn(x_sequences, y_sequences, epochs=100)

Epoch 1/100, Loss: 1.3088
Epoch 2/100, Loss: 1.0139
Epoch 3/100, Loss: 1.0111
Epoch 4/100, Loss: 1.0092
Epoch 5/100, Loss: 1.0074
Epoch 6/100, Loss: 1.0058
Epoch 7/100, Loss: 1.0042
Epoch 8/100, Loss: 1.0027
Epoch 9/100, Loss: 1.0013
Epoch 10/100, Loss: 1.0000
Epoch 11/100, Loss: 0.9986
Epoch 12/100, Loss: 0.9973
Epoch 13/100, Loss: 0.9960
Epoch 14/100, Loss: 0.9946
Epoch 15/100, Loss: 0.9931
Epoch 16/100, Loss: 0.9917
Epoch 17/100, Loss: 0.9903
Epoch 18/100, Loss: 0.9889
Epoch 19/100, Loss: 0.9875
Epoch 20/100, Loss: 0.9861
Epoch 21/100, Loss: 0.9846
Epoch 22/100, Loss: 0.9830
Epoch 23/100, Loss: 0.9813
Epoch 24/100, Loss: 0.9792
Epoch 25/100, Loss: 0.9766
Epoch 26/100, Loss: 0.9735
Epoch 27/100, Loss: 0.9704
Epoch 28/100, Loss: 0.9679
Epoch 29/100, Loss: 0.9657
Epoch 30/100, Loss: 0.9638
Epoch 31/100, Loss: 0.9621
Epoch 32/100, Loss: 0.9606
Epoch 33/100, Loss: 0.9592
Epoch 34/100, Loss: 0.9578
Epoch 35/100, Loss: 0.9566
Epoch 36/100, Loss: 0.9553
Epoch 37/100, Loss: 0.9541
Epoch 38/1

In [21]:
import json

# 读取JSON文件
with open("./data/纳兰性德诗集.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# 提取所有诗句
texts = ""
for poem in data:
    for para in poem["para"]:
        texts += para + " "  # 使用空格分隔段落

# 合并所有诗句为一个长文本
text = "".join(texts)

# 打印部分文本
print(text[:100])

# 创建字符映射
chars = sorted(list(set(text)))
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

# 打印字符集大小
print(f"字符集大小: {len(chars)}")

山一程，水一程，身向榆关那畔行，夜深千帐灯 风一更，雪一更，聒碎乡心梦不成，故园无此声 人生若只如初见，何事秋风悲画扇 等闲变却故人心，却道故心人易变 骊山语罢清宵半，泪雨霖铃终不怨 何如薄幸锦衣郎，
字符集大小: 1855


In [22]:
# 超参数设置
input_size = len(chars)  # 输入大小为字符集的大小
hidden_size = 100  # 隐藏层大小
output_size = len(chars)  # 输出大小为字符集的大小
learning_rate = 0.01

In [23]:
# 权重初始化
W_xh = np.random.randn(hidden_size, input_size) * 0.01
W_hh = np.random.randn(hidden_size, hidden_size) * 0.01
W_hy = np.random.randn(output_size, hidden_size) * 0.01
b_h = np.zeros((hidden_size, 1))
b_y = np.zeros((output_size, 1))

In [24]:
def one_hot_encoding(char, char_to_ix):
    vec = np.zeros((len(char_to_ix), 1))
    vec[char_to_ix[char]] = 1
    return vec


def forward_backward_propagation(inputs, targets, h_prev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(h_prev)
    loss = 0

    # 前向传播
    for t in range(len(inputs)):
        xs[t] = one_hot_encoding(inputs[t], char_to_ix)
        hs[t] = np.tanh(np.dot(W_xh, xs[t]) + np.dot(W_hh, hs[t - 1]) + b_h)
        ys[t] = np.dot(W_hy, hs[t]) + b_y
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # 计算softmax
        loss += -np.log(ps[t][char_to_ix[targets[t]], 0])  # 计算交叉熵损失

    # 反向传播
    dW_xh, dW_hh, dW_hy = np.zeros_like(W_xh), np.zeros_like(W_hh), np.zeros_like(W_hy)
    db_h, db_y = np.zeros_like(b_h), np.zeros_like(b_y)
    dh_next = np.zeros_like(hs[0])

    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[char_to_ix[targets[t]]] -= 1  # 计算softmax梯度
        dW_hy += np.dot(dy, hs[t].T)
        db_y += dy

        dh = np.dot(W_hy.T, dy) + dh_next
        dh_raw = (1 - hs[t] ** 2) * dh  # tanh的梯度
        db_h += dh_raw
        dW_xh += np.dot(dh_raw, xs[t].T)
        dW_hh += np.dot(dh_raw, hs[t - 1].T)
        dh_next = np.dot(W_hh.T, dh_raw)

    for dparam in [dW_xh, dW_hh, dW_hy, db_h, db_y]:
        np.clip(dparam, -5, 5, out=dparam)  # 防止梯度爆炸

    return loss, dW_xh, dW_hh, dW_hy, db_h, db_y, hs[len(inputs) - 1]

In [25]:
def update_parameters(dW_xh, dW_hh, dW_hy, db_h, db_y):
    global W_xh, W_hh, W_hy, b_h, b_y
    W_xh -= learning_rate * dW_xh
    W_hh -= learning_rate * dW_hh
    W_hy -= learning_rate * dW_hy
    b_h -= learning_rate * db_h
    b_y -= learning_rate * db_y

In [26]:
def sample(h, seed_ix, n):
    """基于给定的初始隐藏状态h和种子字符索引seed_ix生成n个字符"""
    x = np.zeros((input_size, 1))
    x[seed_ix] = 1
    ixes = [seed_ix]
    for t in range(n):
        h = np.tanh(np.dot(W_xh, x) + np.dot(W_hh, h) + b_h)
        y = np.dot(W_hy, h) + b_y
        p = np.exp(y) / np.sum(np.exp(y))
        ix = np.random.choice(range(input_size), p=p.ravel())
        x = np.zeros((input_size, 1))
        x[ix] = 1
        ixes.append(ix)
    txt = "".join(ix_to_char[ix] for ix in ixes)
    return txt

In [27]:
def train_rnn(data, epochs=10):
    n, p = 0, 0
    h_prev = np.zeros((hidden_size, 1))

    for epoch in range(epochs):
        if p + 25 + 1 >= len(data) or n == 0:
            h_prev = np.zeros((hidden_size, 1))  # 重置RNN隐藏状态
            p = 0  # 回到数据起点

        inputs = data[p : p + 25]
        targets = data[p + 1 : p + 26]

        loss, dW_xh, dW_hh, dW_hy, db_h, db_y, h_prev = forward_backward_propagation(
            inputs, targets, h_prev
        )
        update_parameters(dW_xh, dW_hh, dW_hy, db_h, db_y)

        p += 25  # 移动数据窗口
        n += 1

        if n % 500 == 0:
            print(f"Epoch {epoch+1}, Iteration {n}, Loss: {loss:.4f}")
            seed_char = "月"  # 可以选择任何一个字符作为种子字符
            seed_ix = char_to_ix[seed_char]
            generated_text_length = 10  # 生成文本的长度

            # 生成文本
            h_prev = np.zeros((hidden_size, 1))
            sampled_text = sample(h_prev, seed_ix, generated_text_length)
            print(f"生成的文本:\n{sampled_text}")
            sampled_text = sample(h_prev, seed_ix, generated_text_length)
            print(f"生成的文本:\n{sampled_text}")
            sampled_text = sample(h_prev, seed_ix, generated_text_length)
            print(f"生成的文本:\n{sampled_text}")
            sampled_text = sample(h_prev, seed_ix, generated_text_length)
            print(f"生成的文本:\n{sampled_text}")

In [28]:
# 训练完成后生成文本
n_epochs = 50000  # 设置训练轮数
train_rnn(text, epochs=n_epochs)

# 设置种子字符和生成长度
seed_char = "月"  # 可以选择任何一个字符作为种子字符
seed_ix = char_to_ix[seed_char]
generated_text_length = 15  # 生成文本的长度

# 生成文本
h_prev = np.zeros((hidden_size, 1))
sampled_text = sample(h_prev, seed_ix, generated_text_length)
print(f"生成的文本:\n{sampled_text}")

Epoch 500, Iteration 500, Loss: 164.0884
生成的文本:
月细复浣柘山悠梧翦平悠
生成的文本:
月非场惜恒疏转花东重征
生成的文本:
月掩难霓目，又研花木鸯
生成的文本:
月旗心色功，遥砑落瘦桨
Epoch 1000, Iteration 1000, Loss: 168.3126
生成的文本:
月故赏把能斜心圆梦楼 
生成的文本:
月再冠，不归 在重暗浮
生成的文本:
月怡消荆 玉塞世省吹，
生成的文本:
月说倚 沼碧千春齐雨窗
Epoch 1500, Iteration 1500, Loss: 166.5923
生成的文本:
月 凉涛今到又满丝成夜
生成的文本:
月枯桥边黄偏血青量影得
生成的文本:
月倚蛱倾，间阶而东两端
生成的文本:
月言到？月枝分有茶陪怀
Epoch 2000, Iteration 2000, Loss: 165.0924
生成的文本:
月沾俱仙，垒饮白双凤銮
生成的文本:
月吁感 乘妾论语今病涯
生成的文本:
月唇竿药醒是真镜天横怨
生成的文本:
月绪堠 苏书辜代丛酬断
Epoch 2500, Iteration 2500, Loss: 150.5343
生成的文本:
月浦，膺楼初塞饥独怜常
生成的文本:
月濛水冷 窗忍梦海风山
生成的文本:
月连来灵幽跃长痕前空，
生成的文本:
月吹咸夜 闻腰巳红钿凉
Epoch 3000, Iteration 3000, Loss: 145.9544
生成的文本:
月闺君客却，颤光毫思适
生成的文本:
月花许拘畏，帐眉簇雉情
生成的文本:
月零映自障，微酪晕絮埋
生成的文本:
月外不脉兰影心桥万，药
Epoch 3500, Iteration 3500, Loss: 138.8515
生成的文本:
月瞩 思同闲山西地如，
生成的文本:
月凤载雨，李雨去自乌 
生成的文本:
月花烦山树 不来纱声初
生成的文本:
月花身屏，黄立府家总旧
Epoch 4000, Iteration 4000, Loss: 140.0479
生成的文本:
月，殊怅残年锁春 不风
生成的文本:
月管青，暗草开黛无月话
生成的文本:
月怀曲斜梦 梦风高写须
生成的文本:
月斗华，酒花琳 蓬移一
Epoch 4500, Iteration 4500, Loss: 144.7505
生成的文本:


In [None]:
# 训练完成后生成文本
n_epochs = 50000  # 设置训练轮数
train_rnn(text, epochs=n_epochs)

Epoch 500, Iteration 500, Loss: 149.4062
生成的文本:
月、程，西泪不，住观言
Epoch 1000, Iteration 1000, Loss: 168.0919
生成的文本:
月，明烟下 风岸梦，劫
Epoch 1500, Iteration 1500, Loss: 166.5011
生成的文本:
月一、便下 因白顿色窈
Epoch 2000, Iteration 2000, Loss: 171.3461
生成的文本:
月，朱溶谁夫 斜，黄人
Epoch 2500, Iteration 2500, Loss: 155.1160
生成的文本:
月 寄载 相逐争夜新断
Epoch 3000, Iteration 3000, Loss: 151.9432
生成的文本:
月 明图空落怯 朝转出
Epoch 3500, Iteration 3500, Loss: 147.1010
生成的文本:
月一闲灭秋 莲马皆时何
Epoch 4000, Iteration 4000, Loss: 164.5352
生成的文本:
月，莲欲一定簟远坠金吟
Epoch 4500, Iteration 4500, Loss: 160.5108
生成的文本:
月，凭除衣，那蒙花杨，
Epoch 5000, Iteration 5000, Loss: 149.8608
生成的文本:
月，角忆梦应兰自团，一
Epoch 5500, Iteration 5500, Loss: 160.7609
生成的文本:
月，鼓隐踪平不(谁，最
Epoch 6000, Iteration 6000, Loss: 164.0692
生成的文本:
月 树，西纹壁不古 两
Epoch 6500, Iteration 6500, Loss: 155.5900
生成的文本:
月，  枕当愁桃平来锦
Epoch 7000, Iteration 7000, Loss: 168.5200
生成的文本:
月愿三 笔世共尽 梦家
Epoch 7500, Iteration 7500, Loss: 155.9729
生成的文本:
月 休间波 江为草立幽
Epoch 8000, Iteration 8000, Loss: 154.9010
生成的文本:
月，不绿，山天弦且，今
Epoch 8500

KeyboardInterrupt: 