In [113]:
import torch
import torch.nn as nn
# 1. 单向.单层RNN
single_rnn = nn.RNN(4, 3, 1, batch_first=True)
input = torch.randn(1, 2, 4)# bs*sl*fs
out, h_n = single_rnn(input)
print(out)
print(h_n)

tensor([[[ 0.3175,  0.8780, -0.4405],
         [ 0.6312,  0.7685, -0.2676]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.6312,  0.7685, -0.2676]]], grad_fn=<StackBackward0>)


In [114]:
# 2. 双向.单层RNN
bidirectional_rnn = nn.RNN(4, 3, 1, batch_first=True, bidirectional=True)
out, h_n = bidirectional_rnn(input)
print(out.shape)
print(h_n.shape)

torch.Size([1, 2, 6])
torch.Size([2, 1, 3])


> 手动实现

In [115]:
bs, T = 2, 3 # 批大小，输入序列长度
input_size, hidden_size = 2, 3 # 输入特征大小， 隐藏层特征大小
input = torch.randn(bs, T, input_size) # 随机生成一个输入特征序列
h_prev = torch.zeros(bs, hidden_size) # 生成初始隐含状态
single_rnn = nn.RNN(input_size, hidden_size, 1, batch_first=True)
out, h_n = single_rnn(input, h_prev.unsqueeze(0))
print(input.shape)
print(out.shape)
print(h_n.shape)
print(input)
print(out)
print(h_n)
def rnn_forward(input, weight_ih, weight_hh, bias_ih, bias_hh, h_prev):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim)
    for t in range(T):
        x = input[: , t, :].unsqueeze(2) # 获取当前时刻的输入特征， bs*input_size
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs,1,1) # bs*h_dim*input_size
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs,1,1) # bs*h_dim*h_dim
        w_times_x = torch.bmm(w_ih_batch, x).squeeze(-1) # bs*h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1)
        h_prev = torch.tanh(w_times_x+bias_ih+w_times_h+bias_hh)
        h_out[:, t, :] = h_prev
    return h_out, h_prev.unsqueeze(0)

torch.Size([2, 3, 2])
torch.Size([2, 3, 3])
torch.Size([1, 2, 3])
tensor([[[ 1.1546,  1.0514],
         [-0.6826,  1.3194],
         [-0.8859, -1.3638]],

        [[-0.8608, -0.4654],
         [ 0.8120,  0.0963],
         [ 1.5826,  0.1573]]])
tensor([[[ 0.8473,  0.5355,  0.7361],
         [ 0.2560,  0.8046,  0.5974],
         [ 0.1232,  0.6261, -0.3266]],

        [[ 0.2388,  0.2577, -0.3195],
         [ 0.6503,  0.6086,  0.3659],
         [ 0.7836,  0.8212,  0.6717]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.1232,  0.6261, -0.3266],
         [ 0.7836,  0.8212,  0.6717]]], grad_fn=<StackBackward0>)


>验证

In [116]:
# 将上面定义的RNN参数拿出来 证明输出一样
for k,v in single_rnn.named_parameters():
    print(k, v)
weight_ih,  weight_hh,bias_ih,  bias_hh = single_rnn.named_parameters()
rnn_forward(input, weight_ih[1], weight_hh[1], bias_ih[1], bias_hh[1], h_prev)

weight_ih_l0 Parameter containing:
tensor([[0.4097, 0.1169],
        [0.0927, 0.0973],
        [0.2856, 0.4599]], requires_grad=True)
weight_hh_l0 Parameter containing:
tensor([[-0.5181, -0.0384,  0.2663],
        [ 0.4099,  0.5517,  0.0204],
        [-0.2913,  0.4488,  0.2106]], requires_grad=True)
bias_ih_l0 Parameter containing:
tensor([ 0.4424,  0.1487, -0.2498], requires_grad=True)
bias_hh_l0 Parameter containing:
tensor([0.2082, 0.2400, 0.3785], requires_grad=True)


(tensor([[[ 0.8473,  0.5355,  0.7361],
          [ 0.2560,  0.8046,  0.5974],
          [ 0.1232,  0.6261, -0.3266]],
 
         [[ 0.2388,  0.2577, -0.3195],
          [ 0.6503,  0.6086,  0.3659],
          [ 0.7836,  0.8212,  0.6717]]], grad_fn=<CopySlices>),
 tensor([[[ 0.1232,  0.6261, -0.3266],
          [ 0.7836,  0.8212,  0.6717]]], grad_fn=<UnsqueezeBackward0>))