In [1]:
import torch.nn as nn
import torch

In [2]:
x = torch.randn(1, 1, 100)

rnn = nn.RNN(input_size=100, hidden_size=30)

out, h = rnn(x) # 单层,序列长度为1
print(out - h) # 此时out=h;即相当于RNNCell(只输出h)

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SubBackward0>)


In [8]:
rnn = nn.RNN(input_size=100, # 输入参数的词向量维度(也就是输入的特征通道)的大小
             hidden_size=10, # 隐含变量的维度大小
             num_layers=2, # 循环神经网络层的多少.若num_layers=2,则第2层的输入是第1层的输出(每一层都有一套参数,层与层之间的参数相互独立)
             nonlinearity='tanh', # 非线性激活函数类型,也可以设置为'relu'.默认nonlinearity='tanh'
             bias=True, # 是否添加偏置.默认bias=True
             batch_first=False, # 如果batch_first=True,则输入张量大小为(N,T,C),而不是(T,N,C).默认batch_first=False
             dropout=0.5) # 如果这个值非零,则在循环神经网络最后输出的基础上加上丢弃层,丢弃的概率由输入的dropout确定.默认dropout=0
x = torch.randn(20, 3, 100) # 输入的默认形状为(T,N,C),其中T为序列的长度,N为min-batch的大小,C为输入的特征数目
h_0 = torch.ones((2, 3, 10)) # (L*D, H, N),其中L为循环神经网络层数,D为1(单向)或2(双向)
out, h = rnn(x, hx=h_0) # 自定h_0,默认h_0为全0张量
print(out.shape, h.shape)

torch.Size([20, 3, 10]) torch.Size([2, 3, 10])


In [9]:
for name, param in rnn.named_parameters():
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
weight_ih_l1   shape= torch.Size([10, 10])
weight_hh_l1   shape= torch.Size([10, 10])
bias_ih_l1   shape= torch.Size([10])
bias_hh_l1   shape= torch.Size([10])


In [12]:
b_run = nn.RNN(input_size=100, hidden_size=15, num_layers=2,
               bidirectional=True) # 是否为设置为双向循环神经网络,默认为False
b_h_0 = torch.ones((4, 3, 15))
b_out, b_h = b_run(x, hx=b_h_0)
# b_out为每个序列最后一层的输出(双向则2维度*2);b_h为最后一个序列每层的输出(双向则0维度*2)
print(b_out.shape, b_h.shape)

torch.Size([20, 3, 30]) torch.Size([4, 3, 15])


In [6]:
for name, param in b_run.named_parameters():
    '''正向和反向两个方向的循环神经网络有各自的相互独立的参数'''
    print(name, '  shape=', param.shape)

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
weight_ih_l0_reverse   shape= torch.Size([10, 100])
weight_hh_l0_reverse   shape= torch.Size([10, 10])
bias_ih_l0_reverse   shape= torch.Size([10])
bias_hh_l0_reverse   shape= torch.Size([10])
weight_ih_l1   shape= torch.Size([10, 20])
weight_hh_l1   shape= torch.Size([10, 10])
bias_ih_l1   shape= torch.Size([10])
bias_hh_l1   shape= torch.Size([10])
weight_ih_l1_reverse   shape= torch.Size([10, 20])
weight_hh_l1_reverse   shape= torch.Size([10, 10])
bias_ih_l1_reverse   shape= torch.Size([10])
bias_hh_l1_reverse   shape= torch.Size([10])


In [7]:
b_run.weight_hh_l0 # all the weights and biases are initialized from U(-\sqrt{k}, \sqrt{k}), where k=1/hidden_size

Parameter containing:
tensor([[-0.1584, -0.1181,  0.0134,  0.1998, -0.1996, -0.0111,  0.2989, -0.2947,
          0.0927,  0.2011],
        [ 0.2428,  0.1034,  0.2653, -0.2804, -0.3050, -0.0727, -0.1274, -0.2225,
          0.1326,  0.2230],
        [ 0.1868,  0.0359,  0.1400, -0.2635, -0.0658,  0.3000,  0.2791, -0.1634,
         -0.2231,  0.0575],
        [ 0.2148, -0.0505, -0.0739, -0.2656, -0.1210,  0.1496,  0.2525, -0.0506,
          0.2353, -0.2564],
        [-0.2597,  0.0110,  0.1036,  0.2018, -0.0318,  0.0163,  0.2232, -0.1560,
         -0.2602, -0.2968],
        [ 0.0404, -0.3090, -0.0423, -0.0009, -0.2997, -0.2353,  0.2185,  0.2978,
         -0.0437, -0.1169],
        [-0.1601,  0.2304, -0.0486, -0.2143, -0.2003,  0.0889,  0.1846,  0.1824,
         -0.1103,  0.3064],
        [ 0.0140, -0.0342,  0.2632,  0.0422, -0.2634,  0.1542,  0.0417, -0.1620,
         -0.0876,  0.1856],
        [ 0.1155, -0.2141,  0.1507,  0.2065,  0.0953, -0.2676, -0.0450, -0.1375,
          0.2399, -0.1195