In [1]:
import torch.nn as nn
import torch

In [2]:
x = torch.randn(1, 1, 100)

rnn = nn.RNN(input_size=100, hidden_size=30)

out, h = rnn(x)  # 单层,序列长度为1

print(out.shape)
print(out - h)  # 此时out=h;即相当于RNNCell(只输出h)

torch.Size([1, 1, 30])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SubBackward0>)


In [3]:
rnn = nn.RNN(input_size=100,  # The number of expected features in the input x
             hidden_size=10,  # 隐含变量的维度大小(即权重矩阵W_{ih}、W_{hh}中的hidden_size)
             num_layers=2,  # 循环神经网络层数.若num_layers=2,则第2层的输入是第1层的输出(层与层之间的参数相互独立)
             nonlinearity='tanh',  # 非线性激活函数类型,也可以设置为'relu'.默认nonlinearity='tanh'
             bias=True,  # 是否添加偏置.默认bias=True
             batch_first=False,  # 如果batch_first=True,则输入张量大小为(N,T,C),而不是(T,N,C).默认batch_first=False
             dropout=0.5)  # 如果这个值非零,则在循环神经网络最后输出的基础上加上丢弃层,丢弃的概率由输入的dropout确定.默认dropout=0

x = torch.randn(20, 3, 100)  # 输入的默认形状为(T,N,C),其中T为序列的长度,N为min-batch的大小,C为输入的特征数目
h_0 = torch.ones((2, 3, 10))  # (L*D, N, hidden_size),其中L为循环神经网络层数,D为1(单向)或2(双向)
out, h = rnn(x, hx=h_0)  # 自定义h_0,默认h_0为全0张量
print(out.shape, h.shape)  # out.shape=(T, N, hidden_size);h.shape=(L*D, N, hidden_size)

torch.Size([20, 3, 10]) torch.Size([2, 3, 10])


In [4]:
'''
~RNN.weight_ih_l[k] –
    the learnable input-hidden weights of the k-th layer, of shape (hidden_size, input_size) for k = 0.
    Otherwise, the shape is (hidden_size, num_directions * hidden_size)
~RNN.weight_hh_l[k] –
    the learnable hidden-hidden weights of the k-th layer, of shape (hidden_size, hidden_size)
~RNN.bias_ih_l[k] –
    the learnable input-hidden bias of the k-th layer, of shape (hidden_size)
~RNN.bias_hh_l[k] –
    the learnable hidden-hidden bias of the k-th layer, of shape (hidden_size)
'''
for name, param in rnn.named_parameters():
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
weight_ih_l1   shape= torch.Size([10, 10])
weight_hh_l1   shape= torch.Size([10, 10])
bias_ih_l1   shape= torch.Size([10])
bias_hh_l1   shape= torch.Size([10])


In [5]:
b_run = nn.RNN(input_size=100, hidden_size=15, num_layers=2,
               bidirectional=True)  # 是否为设置为双向循环神经网络,默认为False

# 此时b_h_0为正向hx(shape=(2, 3, 15))和反向hx(shape=(2, 3, 15))在第0个维度的拼接
b_h_0 = torch.ones((4, 3, 15))

# ★★★★★b_out中T(训练的长度)信息仍然保留
b_out, b_h = b_run(x, hx=b_h_0)
# b_out为每个序列最后一层的输出(双向则第二维度*2);b_h为最后一个序列每层的输出(双向则第0维度*2)
print(b_out.shape, b_h.shape)

torch.Size([20, 3, 30]) torch.Size([4, 3, 15])


In [6]:
for name, param in b_run.named_parameters():
    '''正向和反向两个方向的循环神经网络有各自的相互独立的参数'''
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([15, 100])
weight_hh_l0   shape= torch.Size([15, 15])
bias_ih_l0   shape= torch.Size([15])
bias_hh_l0   shape= torch.Size([15])
weight_ih_l0_reverse   shape= torch.Size([15, 100])
weight_hh_l0_reverse   shape= torch.Size([15, 15])
bias_ih_l0_reverse   shape= torch.Size([15])
bias_hh_l0_reverse   shape= torch.Size([15])
weight_ih_l1   shape= torch.Size([15, 30])
weight_hh_l1   shape= torch.Size([15, 15])
bias_ih_l1   shape= torch.Size([15])
bias_hh_l1   shape= torch.Size([15])
weight_ih_l1_reverse   shape= torch.Size([15, 30])
weight_hh_l1_reverse   shape= torch.Size([15, 15])
bias_ih_l1_reverse   shape= torch.Size([15])
bias_hh_l1_reverse   shape= torch.Size([15])


In [7]:
b_run.weight_hh_l0  # all the weights and biases are initialized from U(-\sqrt{k}, \sqrt{k}), where k=1/hidden_size

Parameter containing:
tensor([[-0.1624,  0.2138,  0.0355, -0.1030,  0.0249,  0.0346, -0.1337, -0.2361,
         -0.2403, -0.0956,  0.0726,  0.0127,  0.0639,  0.1459,  0.1096],
        [ 0.0793, -0.2393,  0.2392,  0.0268,  0.1901, -0.0037, -0.1741, -0.0875,
         -0.0806,  0.1782, -0.2402, -0.0513,  0.0246,  0.2576, -0.0746],
        [ 0.0227, -0.2464, -0.0191, -0.1724,  0.0934,  0.1226,  0.1112, -0.1264,
         -0.2379,  0.0968, -0.0834,  0.0482, -0.0331,  0.0499,  0.2456],
        [-0.0639, -0.1552, -0.0647,  0.0254, -0.0868, -0.2272,  0.1727,  0.1697,
          0.2208,  0.1287, -0.2386,  0.0543,  0.2541, -0.0613,  0.2509],
        [ 0.0355, -0.0166, -0.2445, -0.0880,  0.2539,  0.0606, -0.2037, -0.0882,
          0.1818,  0.1172, -0.0297, -0.0860, -0.1478, -0.2366, -0.0389],
        [ 0.0048, -0.1967, -0.1605, -0.2279,  0.1017,  0.1661,  0.1931, -0.1015,
         -0.0006, -0.0360,  0.1823, -0.2241, -0.0922, -0.1130, -0.1167],
        [ 0.0159,  0.0018, -0.1755,  0.2395, -0.0436, 

In [8]:
b_run.weight_ih_l1  # 参数全部都requires_grad=True

Parameter containing:
tensor([[ 0.1266, -0.1950, -0.0558, -0.0238, -0.1321, -0.0923,  0.1650, -0.1078,
         -0.0907,  0.2191, -0.0831, -0.2206,  0.1373,  0.2469,  0.2197,  0.0520,
          0.1646, -0.0422,  0.1956, -0.0193,  0.0213, -0.1220, -0.1972, -0.1268,
         -0.1419, -0.0045, -0.0702,  0.1078, -0.0327,  0.1466],
        [ 0.0743,  0.1339,  0.1481,  0.1415, -0.1055,  0.1244,  0.0323, -0.0247,
          0.2374, -0.1276, -0.1891, -0.0081,  0.0392,  0.1447,  0.1944, -0.2141,
         -0.0024,  0.1767,  0.0129, -0.0213,  0.2110,  0.0730, -0.1559,  0.0101,
         -0.1155,  0.0776,  0.2073,  0.1121, -0.0066,  0.1654],
        [ 0.0278,  0.0303,  0.0553, -0.2115, -0.1622,  0.2455,  0.1116, -0.2505,
          0.0621,  0.2028,  0.0170, -0.0820, -0.1857,  0.0681, -0.2511,  0.1831,
          0.0192, -0.0645, -0.0982, -0.0984, -0.1612, -0.2012,  0.1480,  0.0177,
          0.1261,  0.2217, -0.2058,  0.0563, -0.0779,  0.0942],
        [-0.1648,  0.0622,  0.2400, -0.0613, -0.2388, -0.