[toc]

# RNN Numpy 实现四——进一步batch化

基于前面的一些代码，我们来将训练的代码进一步batch化。规范化。

In [None]:
我们上面的 batch 实际上不是一个高效率的 batch 版本

In [19]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder

input_word = list("I am learning RNN")
words = set(input_word)
input_word_onehot = OneHotEncoder(sparse=False).fit_transform(np.array(input_word).reshape(-1, 1))

# 序列长度取 3
sequenceLen = 3

x, y = [], []
for i in range(len(input_word_onehot) - sequenceLen):
    x.append(input_word_onehot[i:i + sequenceLen])
    y.append(input_word_onehot[i + 1: i + 1 + sequenceLen])

x_train = np.array(x)
y_train = np.array(y)

n_class = len(words)
nx = n_class
ny = n_class
nh = 4

def get_weights(shape, dtype=np.float32):
    np.random.seed(123)
    return np.array(np.random.randn(*shape), dtype=dtype)

def get_bias(shape, dtype=np.float32):
    return np.zeros(shape, dtype=dtype)

# 权重初始化，为了之后操作方便，放在一个字典中
weights = {
    'Wxh': get_weights((nx, nh)),
    'Why': get_weights((nh, ny)),
    'Whh': get_weights((nh, nh)),
    'bh': get_bias((1, nh)),
    'by': get_bias((1, ny))
}

def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp = np.sum(exp_a)
    y = exp_a / sum_exp
    return y

def xentropy(y, yhat):
    return np.mean(np.sum(-y * np.log(yhat + 1e-8), axis=0))

def rnn_cell_forward(xs, hprev, weights):
    """
    xs: shape=(n_samples, n_features)
    """
    Why = weights['Why']
    Whh = weights['Whh']
    Wxh = weights['Wxh']
    bh = weights['bh']
    by = weights['by']

    a = np.matmul(xs, Wxh) + np.matmul(hprev, Whh) + bh
    h = np.tanh(a)
    o = np.matmul(h, Why) + by
    yhat = softmax(o)
    return yhat, a, h, o

In [20]:
n_samples = 14 
n_neuron = 3

In [21]:
hprev = np.zeros((n_samples, n_neuron))

In [22]:
rnn_cell_forward(x_train[:, 0, :], hprev, weights)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 4 is different from 3)

In [4]:
def _backward(xs, ys, weights, a, o, h, yhat):
    n_sequences = xs.shape[0]

    Why = weights['Why']
    Whh = weights['Whh']
    Wxh = weights['Wxh']
    bh = weights['bh']
    by = weights['by']

    grads = { name: np.zeros_like(weights[name]) for name in weights}
    danext = None
    for i in range(n_sequences - 1, -1, -1):
        if i == n_sequences - 1:
            danext = np.zeros_like(a[i:i + 1])

        dot = yhat[i:i + 1] - ys[i:i + 1]

        # backprop through ot
        dby = dot
        dWhy = np.matmul(h[i:i + 1].T, dot)
        dht = np.matmul(dot, Why.T) + np.matmul(danext, Whh.T)
        dWhh = np.matmul(h[i:i + 1].T, danext)

        # backprop through ht
        dat = dht * (1 - h[i:i + 1] ** 2)

        # backprop through at
        dWxh = np.matmul(xs[i:i + 1].T, dat)
        dbh = dat

        # 累加梯度
        grads['by'] += dby
        grads['bh'] += dbh
        grads['Whh'] += dWhh
        grads['Wxh'] += dWxh
        grads['Why'] += dWhy
        danext = dat

    for k in grads:
        grads[k] = grads[k] / n_sequences
    return grads

def forward(batch_xs, weights):
    batch_yhat = []
    batch_a = []
    batch_o = []
    batch_h = []

    for xs in batch_xs:
        yhat, a, h, o = _forward(xs, weights)
        batch_yhat.append(yhat)
        batch_o.append(o)
        batch_h.append(h)
        batch_a.append(a)

    batch_yhat = np.array(batch_yhat)
    batch_a = np.array(batch_a)
    batch_h = np.array(batch_h)
    batch_o = np.array(batch_o)
    return batch_yhat, batch_a, batch_h, batch_o

def backward(batch_xs, batch_ys, weights, batch_a, batch_o, batch_h, batch_yhat):
    n_batch = batch_xs.shape[0]
    grads = { name: np.zeros_like(weights[name]) for name in weights}
    for xs, ys, a, o, h, yhat in zip(batch_xs, batch_ys, batch_a, batch_o, batch_h, batch_yhat):
        tmp_grads = _backward(xs, ys, weights, a, o, h, yhat)
        for k in tmp_grads:
            grads[k] += tmp_grads[k]
    for k in grads:
        grads[k] /= n_batch
    return grads

IndentationError: unexpected indent (<ipython-input-4-ca132d2a0323>, line 65)

定义一个 sgd，来更新梯度

In [2]:
def sgd(grads, weights, lr=0.1):
    for name in weights:
        weights[name] -= lr * grads[name]

训练模型，可以看到 loss 不断减少！

In [3]:
n_epochs = 100
for epoch in range(n_epochs):
    yhat, a, h, o = forward(x_train, weights)
    loss = xentropy(y_train, yhat)
    grads = backward(x_train, y_train, weights, a, o, h, yhat)
    sgd(grads, weights, lr=1)
    print(f"Epoch: {epoch}/{n_epochs} Train Loss: {loss}")

Epoch: 0/100 Train Loss: 4.222423528244957
Epoch: 1/100 Train Loss: 3.7168807073561934
Epoch: 2/100 Train Loss: 3.4110927045930355
Epoch: 3/100 Train Loss: 3.0785505858347264
Epoch: 4/100 Train Loss: 2.7974501645702885
Epoch: 5/100 Train Loss: 2.576097749103156
Epoch: 6/100 Train Loss: 2.387566511186096
Epoch: 7/100 Train Loss: 2.2219042945493475
Epoch: 8/100 Train Loss: 2.0911166642398875
Epoch: 9/100 Train Loss: 2.00142374218148
Epoch: 10/100 Train Loss: 1.9323612189859039
Epoch: 11/100 Train Loss: 1.8738544014147673
Epoch: 12/100 Train Loss: 1.8219660172287577
Epoch: 13/100 Train Loss: 1.7745208783750472
Epoch: 14/100 Train Loss: 1.729935647161311
Epoch: 15/100 Train Loss: 1.6869280762731453
Epoch: 16/100 Train Loss: 1.644712598260227
Epoch: 17/100 Train Loss: 1.6035307565963801
Epoch: 18/100 Train Loss: 1.5645356932416574
Epoch: 19/100 Train Loss: 1.5284995263637502
Epoch: 20/100 Train Loss: 1.4952450159396196
Epoch: 21/100 Train Loss: 1.4642122600945904
Epoch: 22/100 Train Loss: 1