# RNN模型

In [4]:
import torch
from torch.nn import RNN

In [2]:
rnn = RNN(input_size = 4, hidden_size = 5, batch_first = True)

In [5]:
inputs = torch.rand(2,3,4) # 输入数据：批次大小为2，每个序列的长度是3，每个时刻输入大小都为4
inputs

tensor([[[0.0717, 0.2483, 0.6369, 0.2375],
         [0.8713, 0.3939, 0.7648, 0.0323],
         [0.4518, 0.3574, 0.7898, 0.1921]],

        [[0.2594, 0.9358, 0.6137, 0.2060],
         [0.2949, 0.7747, 0.4832, 0.5256],
         [0.9427, 0.8908, 0.9020, 0.6380]]])

In [10]:
# 输出分别为隐含层序列 和 最后一个时刻的隐含层
outputs, hn = rnn(inputs)

In [11]:
# 隐含层序列，形状为（batch，seq_len，hidden_size)
outputs

tensor([[[-0.5506,  0.0849, -0.2268,  0.6773,  0.2703],
         [-0.7068,  0.1191, -0.1913,  0.7029,  0.3580],
         [-0.7670, -0.0334, -0.3941,  0.6315,  0.3857]],

        [[-0.3613,  0.2121,  0.1582,  0.6424,  0.1324],
         [-0.6332, -0.1522, -0.0602,  0.3005,  0.2094],
         [-0.4925, -0.0199,  0.0659,  0.6009,  0.2160]]],
       grad_fn=<TransposeBackward1>)

In [13]:
outputs.shape

torch.Size([2, 3, 5])

In [12]:
# 最后一个时刻的隐含层序列，形状为（1，batch，hidden_size)
hn

tensor([[[-0.7670, -0.0334, -0.3941,  0.6315,  0.3857],
         [-0.4925, -0.0199,  0.0659,  0.6009,  0.2160]]],
       grad_fn=<StackBackward0>)

In [14]:
hn.shape

torch.Size([1, 2, 5])

# LSTM模型

In [15]:
from torch.nn import LSTM

In [16]:
lstm = LSTM(input_size = 4, hidden_size = 5, batch_first = True)

In [17]:
inputs = torch.rand(2,3,4)
inputs

tensor([[[0.1814, 0.3707, 0.1272, 0.1767],
         [0.1770, 0.3851, 0.6919, 0.9588],
         [0.5080, 0.6203, 0.7056, 0.2145]],

        [[0.8486, 0.6113, 0.8123, 0.7885],
         [0.0033, 0.1065, 0.5890, 0.7950],
         [0.5764, 0.7725, 0.7797, 0.2712]]])

In [19]:
# LSTM模型的输出包括：输出序列的隐含层outputs，最后一个时刻的隐含层hn和最后一个时刻的记忆细胞cn
outputs,(hn,cn) = lstm(inputs)

In [20]:
outputs

tensor([[[-0.1494, -0.0833,  0.0473, -0.0264, -0.0407],
         [-0.1550, -0.2388,  0.1560, -0.0593, -0.0718],
         [-0.2402, -0.3003,  0.2603, -0.0226, -0.0219]],

        [[-0.1379, -0.1545,  0.1686,  0.0060,  0.0197],
         [-0.1528, -0.2654,  0.1854, -0.0289, -0.0499],
         [-0.2444, -0.3137,  0.3112, -0.0065, -0.0064]]],
       grad_fn=<TransposeBackward0>)

In [21]:
hn

tensor([[[-0.2402, -0.3003,  0.2603, -0.0226, -0.0219],
         [-0.2444, -0.3137,  0.3112, -0.0065, -0.0064]]],
       grad_fn=<StackBackward0>)

In [22]:
cn

tensor([[[-0.3869, -0.4858,  0.4931, -0.0444, -0.0433],
         [-0.3932, -0.5065,  0.5906, -0.0126, -0.0131]]],
       grad_fn=<StackBackward0>)

In [23]:
print(outputs.shape, hn.shape, cn.shape)

torch.Size([2, 3, 5]) torch.Size([1, 2, 5]) torch.Size([1, 2, 5])


# Transformer

In [25]:
import torch
import torch.nn as nn

In [26]:
encoder_layer = nn.TransformerEncoderLayer(d_model=4, nhead=2)

In [27]:
src = torch.rand(2,3,4)
src

tensor([[[0.3611, 0.5325, 0.9481, 0.7963],
         [0.9961, 0.7956, 0.4017, 0.0502],
         [0.8829, 0.4834, 0.9679, 0.2027]],

        [[0.4936, 0.8113, 0.0598, 0.9159],
         [0.5789, 0.0197, 0.1378, 0.9244],
         [0.7272, 0.4989, 0.9344, 0.2296]]])

In [28]:
out = encoder_layer(src)
out

tensor([[[-1.6869,  0.3180,  0.9256,  0.4433],
         [ 0.4204,  1.2509, -0.1791, -1.4921],
         [-0.2207,  0.3562,  1.3134, -1.4489]],

        [[-0.9238,  0.8286, -1.0604,  1.1556],
         [ 0.1103, -0.7229, -0.9739,  1.5865],
         [-0.6777,  0.6753,  1.2431, -1.2407]]],
       grad_fn=<NativeLayerNormBackward0>)