In [28]:
import torch.nn as nn
import torch

In [29]:
x = torch.randn(1, 1, 100)

rnn = nn.RNN(input_size=100, hidden_size=30)

out, h = rnn(x)  # 单层,序列长度为1

print(out.shape)
print(out - h)  # 此时out=h;即相当于RNNCell(只输出h)

torch.Size([1, 1, 30])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SubBackward0>)


In [30]:
rnn = nn.RNN(input_size=100,  # The number of expected features in the input x
             hidden_size=10,  # 隐含变量的维度大小
             num_layers=2,  # 循环神经网络层的多少.若num_layers=2,则第2层的输入是第1层的输出(层与层之间的参数相互独立)
             nonlinearity='tanh',  # 非线性激活函数类型,也可以设置为'relu'.默认nonlinearity='tanh'
             bias=True,  # 是否添加偏置.默认bias=True
             batch_first=False,  # 如果batch_first=True,则输入张量大小为(N,T,C),而不是(T,N,C).默认batch_first=False
             dropout=0.5)  # 如果这个值非零,则在循环神经网络最后输出的基础上加上丢弃层,丢弃的概率由输入的dropout确定.默认dropout=0

x = torch.randn(20, 3, 100)  # 输入的默认形状为(T,N,C),其中T为序列的长度,N为min-batch的大小,C为输入的特征数目
h_0 = torch.ones((2, 3, 10))  # (L*D, N, hidden_size),其中L为循环神经网络层数,D为1(单向)或2(双向)
out, h = rnn(x, hx=h_0)  # 自定h_0,默认h_0为全0张量
print(out.shape, h.shape)  # out.shape=(T, N, hidden_size);h.shape=(L*D, N, hidden_size)

torch.Size([20, 3, 10]) torch.Size([2, 3, 10])


In [31]:
for name, param in rnn.named_parameters():
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
weight_ih_l1   shape= torch.Size([10, 10])
weight_hh_l1   shape= torch.Size([10, 10])
bias_ih_l1   shape= torch.Size([10])
bias_hh_l1   shape= torch.Size([10])


In [32]:
b_run = nn.RNN(input_size=100, hidden_size=15, num_layers=2,
               bidirectional=True)  # 是否为设置为双向循环神经网络,默认为False

# 此时b_h_0为正向hx(shape=(2, 3, 15))和反向hx(shape=(2, 3, 15))在第0个维度的拼接
b_h_0 = torch.ones((4, 3, 15))

# ★★★★★b_out中T(训练的长度)信息仍然保留
b_out, b_h = b_run(x, hx=b_h_0)
# b_out为每个序列最后一层的输出(双向则第二维度*2);b_h为最后一个序列每层的输出(双向则第0维度*2)
print(b_out.shape, b_h.shape)

torch.Size([20, 3, 30]) torch.Size([4, 3, 15])


In [33]:
for name, param in b_run.named_parameters():
    '''正向和反向两个方向的循环神经网络有各自的相互独立的参数'''
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([15, 100])
weight_hh_l0   shape= torch.Size([15, 15])
bias_ih_l0   shape= torch.Size([15])
bias_hh_l0   shape= torch.Size([15])
weight_ih_l0_reverse   shape= torch.Size([15, 100])
weight_hh_l0_reverse   shape= torch.Size([15, 15])
bias_ih_l0_reverse   shape= torch.Size([15])
bias_hh_l0_reverse   shape= torch.Size([15])
weight_ih_l1   shape= torch.Size([15, 30])
weight_hh_l1   shape= torch.Size([15, 15])
bias_ih_l1   shape= torch.Size([15])
bias_hh_l1   shape= torch.Size([15])
weight_ih_l1_reverse   shape= torch.Size([15, 30])
weight_hh_l1_reverse   shape= torch.Size([15, 15])
bias_ih_l1_reverse   shape= torch.Size([15])
bias_hh_l1_reverse   shape= torch.Size([15])


In [34]:
b_run.weight_hh_l0  # all the weights and biases are initialized from U(-\sqrt{k}, \sqrt{k}), where k=1/hidden_size

Parameter containing:
tensor([[-1.8555e-01,  1.1329e-01,  2.4918e-01, -3.6007e-02, -1.7716e-03,
          4.3607e-02, -2.8975e-02, -4.4970e-02,  1.6755e-01, -2.3783e-01,
          2.4658e-01,  5.7709e-02, -5.5757e-02,  3.8081e-02, -2.0598e-01],
        [ 1.2940e-01,  3.8460e-02, -1.3125e-01, -2.5347e-01,  5.4901e-02,
          1.9817e-01,  2.5753e-01, -2.0727e-01,  9.3943e-04,  3.2192e-03,
          8.3854e-02, -1.1049e-01,  1.9628e-01,  1.5656e-01,  1.3021e-01],
        [ 2.3819e-01, -1.4488e-02,  8.3998e-02, -4.0635e-02, -1.1969e-01,
          8.7819e-02,  1.8752e-01, -1.1748e-02, -3.6478e-03, -1.1085e-02,
         -2.3807e-02,  8.3186e-02, -8.4126e-03,  1.4259e-01,  1.0121e-01],
        [ 2.0698e-01, -1.2697e-01, -1.2296e-01,  3.8799e-02, -8.4972e-02,
         -1.8428e-01, -1.2324e-01,  1.2904e-01, -1.8404e-01, -2.4366e-01,
          2.3639e-01, -2.2278e-01, -1.1823e-01,  1.3325e-01,  6.7143e-02],
        [-1.9387e-01, -8.9016e-03, -8.9846e-03, -1.9714e-01,  2.3283e-01,
          2.

In [35]:
b_run.weight_ih_l1  # 参数全部都requires_grad=True

Parameter containing:
tensor([[-0.2248,  0.1640, -0.2155,  0.1693, -0.0283, -0.1041, -0.2188,  0.2108,
         -0.0094, -0.0677, -0.0338,  0.1651, -0.2539,  0.2007,  0.0583, -0.1160,
         -0.2139, -0.0890,  0.1946,  0.1556,  0.1572,  0.0360, -0.1896, -0.0069,
          0.2461,  0.2242, -0.0193,  0.0751,  0.2293, -0.0865],
        [ 0.0840, -0.1849,  0.0100, -0.0776, -0.1552, -0.0373, -0.2206, -0.0044,
          0.0008, -0.2063,  0.1485, -0.1798,  0.1573, -0.0444,  0.0748, -0.1194,
          0.1256, -0.0092,  0.1403, -0.2454, -0.0273, -0.0206, -0.1796,  0.0938,
          0.1657, -0.1908, -0.0671,  0.0981, -0.2397,  0.1281],
        [ 0.0070, -0.1161, -0.1385, -0.0030,  0.2107, -0.0189, -0.0617,  0.1363,
          0.0254,  0.2118,  0.2511,  0.1142, -0.2319,  0.1187, -0.1703,  0.1397,
         -0.2175, -0.0484,  0.1403,  0.1530,  0.1407,  0.1849,  0.2422,  0.1005,
         -0.0882,  0.1747, -0.2290,  0.1769, -0.0022,  0.2204],
        [ 0.2242,  0.2085, -0.1308, -0.0033,  0.1474, -0.