In [2]:
import torch.nn as nn
import torch

In [3]:
x = torch.randn(1, 1, 100)

rnn = nn.RNN(input_size=100, hidden_size=30)

out, h = rnn(x)  # 单层,序列长度为1
print(out - h)  # 此时out=h;即相当于RNNCell(只输出h)

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SubBackward0>)


In [4]:
rnn = nn.RNN(input_size=100,  # The number of expected features in the input x
             hidden_size=10,  # 隐含变量的维度大小
             num_layers=2,  # 循环神经网络层的多少.若num_layers=2,则第2层的输入是第1层的输出(每一层都有一套参数,层与层之间的参数相互独立)
             nonlinearity='tanh',  # 非线性激活函数类型,也可以设置为'relu'.默认nonlinearity='tanh'
             bias=True,  # 是否添加偏置.默认bias=True
             batch_first=False,  # 如果batch_first=True,则输入张量大小为(N,T,C),而不是(T,N,C).默认batch_first=False
             dropout=0.5)  # 如果这个值非零,则在循环神经网络最后输出的基础上加上丢弃层,丢弃的概率由输入的dropout确定.默认dropout=0
x = torch.randn(20, 3, 100)  # 输入的默认形状为(T,N,C),其中T为序列的长度,N为min-batch的大小,C为输入的特征数目
h_0 = torch.ones((2, 3, 10))  # (L*D, N, hidden_size),其中L为循环神经网络层数,D为1(单向)或2(双向)
out, h = rnn(x, hx=h_0)  # 自定h_0,默认h_0为全0张量
print(out.shape, h.shape)  # out.shape=(T, N, hidden_size);h.shape=(L*D, N, hidden_size)

torch.Size([20, 3, 10]) torch.Size([2, 3, 10])


In [5]:
for name, param in rnn.named_parameters():
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
weight_ih_l1   shape= torch.Size([10, 10])
weight_hh_l1   shape= torch.Size([10, 10])
bias_ih_l1   shape= torch.Size([10])
bias_hh_l1   shape= torch.Size([10])


In [6]:
b_run = nn.RNN(input_size=100, hidden_size=15, num_layers=2,
               bidirectional=True)  # 是否为设置为双向循环神经网络,默认为False
b_h_0 = torch.ones((4, 3, 15))
b_out, b_h = b_run(x, hx=b_h_0)
# b_out为每个序列最后一层的输出(双向则2维度*2);b_h为最后一个序列每层的输出(双向则0维度*2)
print(b_out.shape, b_h.shape)

torch.Size([20, 3, 30]) torch.Size([4, 3, 15])


In [7]:
for name, param in b_run.named_parameters():
    '''正向和反向两个方向的循环神经网络有各自的相互独立的参数'''
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([15, 100])
weight_hh_l0   shape= torch.Size([15, 15])
bias_ih_l0   shape= torch.Size([15])
bias_hh_l0   shape= torch.Size([15])
weight_ih_l0_reverse   shape= torch.Size([15, 100])
weight_hh_l0_reverse   shape= torch.Size([15, 15])
bias_ih_l0_reverse   shape= torch.Size([15])
bias_hh_l0_reverse   shape= torch.Size([15])
weight_ih_l1   shape= torch.Size([15, 30])
weight_hh_l1   shape= torch.Size([15, 15])
bias_ih_l1   shape= torch.Size([15])
bias_hh_l1   shape= torch.Size([15])
weight_ih_l1_reverse   shape= torch.Size([15, 30])
weight_hh_l1_reverse   shape= torch.Size([15, 15])
bias_ih_l1_reverse   shape= torch.Size([15])
bias_hh_l1_reverse   shape= torch.Size([15])


In [8]:
b_run.weight_hh_l0  # all the weights and biases are initialized from U(-\sqrt{k}, \sqrt{k}), where k=1/hidden_size

Parameter containing:
tensor([[-0.0151,  0.2416, -0.1300,  0.2511, -0.1046, -0.2568, -0.0399, -0.0436,
         -0.0560, -0.1171,  0.1784, -0.1166,  0.0170, -0.2542, -0.0576],
        [ 0.1710,  0.0177,  0.2132,  0.1023,  0.0123, -0.2208, -0.0694, -0.0633,
          0.0901, -0.1417, -0.0244,  0.1078,  0.0665,  0.0623, -0.0622],
        [ 0.0223,  0.0194,  0.1586,  0.0082, -0.0610, -0.1835,  0.0842,  0.0679,
          0.2415,  0.0253, -0.0188,  0.0402, -0.0924, -0.1708, -0.1862],
        [-0.0646, -0.2451,  0.2310,  0.0786,  0.1117,  0.1437,  0.1339,  0.2013,
          0.2449, -0.1397,  0.0485,  0.1635,  0.0893, -0.0083, -0.1293],
        [-0.0877, -0.2164,  0.1001,  0.1458,  0.2493,  0.2552,  0.0658, -0.1343,
          0.1258,  0.1682, -0.0175,  0.0632, -0.1136,  0.1003,  0.1862],
        [-0.1152,  0.1715, -0.2484, -0.0080,  0.1486,  0.2519, -0.2383,  0.2436,
          0.0661, -0.0351,  0.0128, -0.1589,  0.0005,  0.2010, -0.0897],
        [ 0.0659, -0.1093,  0.2278, -0.1867,  0.1529, 

In [9]:
b_run.weight_ih_l1  # 参数全部都requires_grad=True

Parameter containing:
tensor([[-0.0966, -0.1513, -0.1663,  0.1073,  0.0128, -0.0440, -0.1607, -0.0373,
          0.0577,  0.2502,  0.2120, -0.1458,  0.2072,  0.2539, -0.1886,  0.0592,
         -0.1056, -0.2543, -0.0498,  0.1931,  0.1678, -0.2482, -0.1619,  0.1888,
         -0.2212, -0.1199,  0.2173,  0.1100,  0.1541,  0.1874],
        [ 0.0346, -0.0526,  0.1500,  0.1631,  0.0500, -0.0284, -0.2350,  0.2069,
         -0.1388,  0.2476,  0.1115, -0.1041,  0.1347,  0.1144, -0.0265,  0.1072,
          0.0079, -0.2297,  0.0762,  0.1567,  0.2171, -0.1911, -0.0843, -0.0939,
         -0.0571,  0.1494, -0.0350, -0.2530,  0.1366, -0.1039],
        [ 0.0522, -0.1960, -0.1029,  0.0592, -0.2230,  0.2427,  0.1275,  0.0909,
          0.1935,  0.0555, -0.2453,  0.1403, -0.2141, -0.0277, -0.0187,  0.1958,
         -0.1234, -0.2159,  0.2092,  0.1423, -0.2287, -0.0164, -0.1524, -0.1810,
         -0.0498,  0.0433, -0.0319,  0.1020,  0.1557,  0.0984],
        [-0.1029, -0.0461,  0.0122,  0.0928, -0.0769,  0.