In [27]:
import torch.nn as nn
import torch

In [28]:
x = torch.randn(1, 1, 100)

rnn = nn.RNN(input_size=100, hidden_size=30)

out, h = rnn(x)  # 单层,序列长度为1

print(out.shape)
print(out - h)  # 此时out=h;即相当于RNNCell(只输出h)

torch.Size([1, 1, 30])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SubBackward0>)


In [29]:
rnn = nn.RNN(input_size=100,  # The number of expected features in the input x
             hidden_size=10,  # 隐含变量的维度大小(即权重矩阵W_{ih}、W_{hh}中的hidden_size)
             num_layers=2,  # 循环神经网络层数.若num_layers=2,则第2层的输入是第1层的输出(层与层之间的参数相互独立)
             nonlinearity='tanh',  # 非线性激活函数类型,也可以设置为'relu'.默认nonlinearity='tanh'
             bias=True,  # 是否添加偏置.默认bias=True
             batch_first=False,  # 如果batch_first=True,则输入张量大小为(N,T,C),而不是(T,N,C).默认batch_first=False
             dropout=0.5)  # 如果这个值非零,则在循环神经网络最后输出的基础上加上丢弃层,丢弃的概率由输入的dropout确定.默认dropout=0

x = torch.randn(20, 3, 100)  # 输入的默认形状为(T,N,C),其中T为序列的长度,N为min-batch的大小,C为输入的特征数目
h_0 = torch.ones((2, 3, 10))  # (L*D, N, hidden_size),其中L为循环神经网络层数,D为1(单向)或2(双向)
out, h = rnn(x, hx=h_0)  # 自定义h_0,默认h_0为全0张量
# out.shape=(T, N, D*hidden_size);h.shape=(L*D, N, hidden_size)
# 若设置batch_first=True===>out.shape=(N, T, D*hidden_size);h.shape不变,仍为(L*D, N, hidden_size)
print(out.shape, h.shape)

torch.Size([20, 3, 10]) torch.Size([2, 3, 10])


In [30]:
'''
~RNN.weight_ih_l[k] –
    the learnable input-hidden weights of the k-th layer, of shape (hidden_size, input_size) for k = 0.
    Otherwise, the shape is (hidden_size, num_directions * hidden_size)
~RNN.weight_hh_l[k] –
    the learnable hidden-hidden weights of the k-th layer, of shape (hidden_size, hidden_size)
~RNN.bias_ih_l[k] –
    the learnable input-hidden bias of the k-th layer, of shape (hidden_size)
~RNN.bias_hh_l[k] –
    the learnable hidden-hidden bias of the k-th layer, of shape (hidden_size)
'''
for name, param in rnn.named_parameters():
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
weight_ih_l1   shape= torch.Size([10, 10])
weight_hh_l1   shape= torch.Size([10, 10])
bias_ih_l1   shape= torch.Size([10])
bias_hh_l1   shape= torch.Size([10])


In [31]:
b_run = nn.RNN(input_size=100, hidden_size=15, num_layers=2,
               bidirectional=True)  # 是否为设置为双向循环神经网络,默认为False

# 此时b_h_0为正向hx(shape=(2, 3, 15))和反向hx(shape=(2, 3, 15))在第0个维度的拼接
b_h_0 = torch.ones((4, 3, 15))

# ★★★★★b_out中T(训练的长度)信息仍然保留
b_out, b_h = b_run(x, hx=b_h_0)
# b_out为每个序列最后一层的输出(双向则第二维度*2);b_h为最后一个序列每层的输出(双向则第0维度*2)
print(b_out.shape, b_h.shape)

torch.Size([20, 3, 30]) torch.Size([4, 3, 15])


In [32]:
for name, param in b_run.named_parameters():
    '''正向和反向两个方向的循环神经网络有各自的相互独立的参数'''
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([15, 100])
weight_hh_l0   shape= torch.Size([15, 15])
bias_ih_l0   shape= torch.Size([15])
bias_hh_l0   shape= torch.Size([15])
weight_ih_l0_reverse   shape= torch.Size([15, 100])
weight_hh_l0_reverse   shape= torch.Size([15, 15])
bias_ih_l0_reverse   shape= torch.Size([15])
bias_hh_l0_reverse   shape= torch.Size([15])
weight_ih_l1   shape= torch.Size([15, 30])
weight_hh_l1   shape= torch.Size([15, 15])
bias_ih_l1   shape= torch.Size([15])
bias_hh_l1   shape= torch.Size([15])
weight_ih_l1_reverse   shape= torch.Size([15, 30])
weight_hh_l1_reverse   shape= torch.Size([15, 15])
bias_ih_l1_reverse   shape= torch.Size([15])
bias_hh_l1_reverse   shape= torch.Size([15])


In [33]:
b_run.weight_hh_l0  # all the weights and biases are initialized from U(-\sqrt{k}, \sqrt{k}), where k=1/hidden_size

Parameter containing:
tensor([[-0.0664,  0.2473,  0.1546, -0.0315,  0.0291,  0.1714,  0.0860,  0.0098,
         -0.2483, -0.0639, -0.1112, -0.0952,  0.2200, -0.0205, -0.0586],
        [ 0.0651,  0.0607, -0.0787,  0.1104, -0.0989, -0.0302,  0.1479, -0.1122,
          0.0369,  0.0201,  0.0712, -0.0413,  0.2326,  0.0024,  0.1086],
        [ 0.1771,  0.0170, -0.0488, -0.1944, -0.1664,  0.0313, -0.0879,  0.0117,
         -0.0224,  0.2021, -0.0964, -0.1600,  0.1889, -0.0350,  0.1536],
        [-0.0929, -0.1903, -0.0715, -0.1468,  0.1804,  0.1231,  0.2238, -0.0177,
         -0.1376, -0.1275, -0.1319, -0.0322, -0.0056,  0.0703, -0.1925],
        [-0.0739, -0.0251,  0.2277, -0.2348, -0.1723, -0.2342, -0.1074, -0.0144,
          0.1352, -0.0979, -0.0080,  0.2348,  0.0218,  0.2503,  0.1275],
        [-0.2253, -0.2495,  0.1496,  0.1250,  0.1055, -0.1759,  0.2489,  0.0976,
         -0.1971,  0.1816, -0.1607, -0.2418, -0.1232, -0.2426,  0.1417],
        [-0.2137, -0.2436,  0.2119, -0.1771,  0.0787, 

In [34]:
b_run.weight_ih_l1  # 参数全部都requires_grad=True

Parameter containing:
tensor([[ 7.3919e-02,  1.6725e-01,  1.7216e-01, -2.1449e-01, -1.2055e-01,
         -1.0914e-01, -1.5915e-01,  7.6740e-02, -6.1194e-02, -8.4321e-02,
          4.7601e-02, -1.2521e-01,  8.4071e-02, -6.5983e-02,  5.8618e-02,
         -5.5857e-02, -2.5500e-01, -7.3008e-02,  8.7387e-02,  4.2918e-02,
         -2.1365e-01, -2.2813e-01, -1.5030e-01, -4.8744e-02, -2.0532e-01,
          1.5509e-01,  1.8230e-01, -3.2281e-02,  1.3973e-01,  2.2606e-01],
        [-2.2368e-01, -1.0966e-01,  2.1165e-01, -1.9476e-01,  2.3308e-01,
          2.2298e-04, -1.0636e-01, -7.1339e-02,  1.5072e-01,  2.3525e-01,
         -7.4957e-02, -8.4542e-02, -4.1271e-02,  8.6316e-02, -2.5298e-01,
          7.0691e-02,  6.1845e-02, -1.9606e-01,  2.0044e-01,  2.3732e-01,
          1.8926e-01, -1.4666e-01,  1.6441e-01,  2.1130e-01, -2.2511e-01,
         -1.9511e-01, -4.4169e-02,  2.3833e-01,  5.8038e-02,  2.2865e-01],
        [-1.9751e-01, -2.5524e-01, -4.8606e-02, -1.2614e-02, -1.4625e-01,
          3.62