In [4]:
import torch
import torch.nn as nn

$$
\begin{equation}
\begin{aligned}
& batch\_first=True, 那么shape就是(N, L, H_{in}) \\
& N = batch\_size \\
& L = sequence length \\
& D = 2\ if\  bidirectional=True\ otherwise\ 1 \\
& H_{in} = input_size \\
& H_{out} = hidden_size \\
\end{aligned}
\end{equation}
$$

## RNN

In [15]:
# input_size=100, hidden_size=20, num_layers=1
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=2, batch_first=True)

In [16]:
# 输入3个样本序列(batch=3), 序列长为10(seq_len=10), 每个特征100维度(feature_len=100)
x = torch.randn(3, 10, 100)

In [19]:
# 传入RNN处理, 另外传入h_0, shape是< 层数 * bidirectional, batch, hidden_len=20 >

# out shape是 batch_size, seqence_len, bidirectional * hidden_size
out, h = rnn(x, torch.zeros(2, 3, 20))

输出是每一个时刻在空间上最后一层的输出[batch, seq, hidden_size]

ht是最后一个时刻上所有层的记忆单元 [batch, num_layers, hidden_size]

In [18]:
# 输出返回的out和最终的隐藏记忆单元的shape
print('out shape is: ', out.shape)  # torch.Size([3, 10, 20])
print('h shape is: ', h.shape)  # torch.Size([1, 3, 20])

out shape is:  torch.Size([3, 10, 20])
h shape is:  torch.Size([2, 3, 20])


## LSTM

In [21]:
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=2, batch_first=True)

In [23]:
# 输入shape [batch_size, seq_len, input_size] batch_first=True
x = torch.rand(3, 10, 100)

In [26]:
# 初始化两个状态，隐状态ht和内部状态ct, shape = [双向bidirectional * num_layers, batch_size, hidden_size];
# – input (seq_len, batch, input_size)
# – h_0 (num_layers * num_directions, batch, hidden_size)
# – c_0 (num_layers * num_directions, batch, hidden_size)
h0 = torch.rand(2, 3, 20)

In [27]:
c0 = torch.rand(2, 3, 20)

In [30]:
output, (hn, cn) = lstm(x, (h0, c0))

In [32]:
output.shape

torch.Size([3, 10, 20])

In [33]:
hn.shape

torch.Size([2, 3, 20])

In [40]:
torch.eye(7)[[1,5,0,4,3]]

tensor([[0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.]])

In [49]:
a = [0,1,2,3,4,5,6,7,8,9]
step = 2
for i in range(step, len(a)):
    b = a[i-step:i]
    t = a[i]
    print(b)
    print(t)

[0, 1]
2
[1, 2]
3
[2, 3]
4
[3, 4]
5
[4, 5]
6
[5, 6]
7
[6, 7]
8
[7, 8]
9


In [50]:
torch.div(torch.tensor(4), torch.tensor(10))

tensor(0.4000)

In [69]:
a = torch.rand(9)
print(a)
b = torch.tensor([2,4,5,6])

tensor([0.8269, 0.3457, 0.3123, 0.3515, 0.7753, 0.0552, 0.5964, 0.8608, 0.2647])


In [70]:
a.index_fill(0, b, 0.0)

tensor([0.8269, 0.3457, 0.0000, 0.3515, 0.0000, 0.0000, 0.0000, 0.8608, 0.2647])

In [71]:
a

tensor([0.8269, 0.3457, 0.3123, 0.3515, 0.7753, 0.0552, 0.5964, 0.8608, 0.2647])

In [72]:
a.neg()

tensor([-0.8269, -0.3457, -0.3123, -0.3515, -0.7753, -0.0552, -0.5964, -0.8608,
        -0.2647])

ValueError: not enough values to unpack (expected 2, got 1)