## hx形状分析

\begin{align}
\mathbf{z}_t &= W_{hh} \mathbf{h}_{t-1} + W_{ih} \mathbf{x}_t + \mathbf{b}_{ih} +  \mathbf{b}_{hh}  \\
\mathbf{h}_{t} &= f(\mathbf{z}_t)
\end{align}

In [111]:
import torch.nn as nn
import torch
import numpy as np

rnn = nn.RNN(input_size=100, hidden_size=10) 
x = torch.randn(20, 3, 100) # [seq len, batch, feature len]
h_0 = torch.ones((1, 3, 10))
out, h = rnn(x, hx=h_0)

'''
由上计算公式可得:
由x.shape=[20, 3, 100] ---> 输入序列长度为20,批次为3,特征维度为100 ---> x_{t}.shape=[100]
由input_size=100, hidden_size=10 --> W_{ih}.shape=[10, 100], weight_{hh}.shape=[10, 10]
故(W_{ih}x_t).shape=[10] ---> (W_{hh}h_{t-1}).shape=[10] ---> h_{h-1}.shape=[10]
由批次为3,层次为1,且为单向循环神经网络,故hx.shape=(1, 3, 10)
'''
for name, param in rnn.named_parameters():
    print(name, '  shape=', param.shape)
    
print('*****************************************')

print(out.shape, h.shape) 

weight_ih_l0   shape= torch.Size([10, 100])
weight_hh_l0   shape= torch.Size([10, 10])
bias_ih_l0   shape= torch.Size([10])
bias_hh_l0   shape= torch.Size([10])
*****************************************
torch.Size([20, 3, 10]) torch.Size([1, 3, 10])


## out,hx形状及其关系分析(单向多层)

<img src='../../../../Other/img/循环神经网络理解.png'>

In [112]:
lstm_unidir = nn.LSTM(input_size=100, 
               hidden_size=4,
               num_layers=3,
               bidirectional=False) # 单向

x = torch.randn(7, 3, 100) # 序类长度为7
h_0 = torch.ones((3, 3, 4))
c_0 = torch.ones((3, 3, 4))

out, (h, c) = lstm_unidir(x, hx=(h_0, c_0)) # h为h_{-1}^{(0)}, ..., h_{-1}^{(-1)}

'''
由上图可知:
num_layers=3, input_size=100, hidden_size=4, bidirectional=False
故可得h_{-1}^{(1)}.shape=[1, 3, 4],h_{-1}^{(2)}.shape=[1, 3, 4],h_{-1}^{(3)}.shape=[1, 3, 4] ---> h_{-1}.shape=[3, 3, 4],c_0类似
输出层为每个时刻最后一层的h_{*}^{(-1)},又有序列长度为7,故out.shape=[7, 3, 4]
'''
for name, param in lstm_unidir.named_parameters():
    print(name, '  shape=', param.shape)

print('*****************************************')

print(out.shape, h.shape, c.shape)

weight_ih_l0   shape= torch.Size([16, 100])
weight_hh_l0   shape= torch.Size([16, 4])
bias_ih_l0   shape= torch.Size([16])
bias_hh_l0   shape= torch.Size([16])
weight_ih_l1   shape= torch.Size([16, 4])
weight_hh_l1   shape= torch.Size([16, 4])
bias_ih_l1   shape= torch.Size([16])
bias_hh_l1   shape= torch.Size([16])
weight_ih_l2   shape= torch.Size([16, 4])
weight_hh_l2   shape= torch.Size([16, 4])
bias_ih_l2   shape= torch.Size([16])
bias_hh_l2   shape= torch.Size([16])
*****************************************
torch.Size([7, 3, 4]) torch.Size([3, 3, 4]) torch.Size([3, 3, 4])


In [113]:
np.isin(out.detach().numpy(), h.detach().numpy()) # 可以看出最后一个序列的out就是对应的h

array([[[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]],

       [[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]],

       [[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]],

       [[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]],

       [[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]],

       [[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]],

       [[ True,  True,  True,  True],
        [ True,  True,  True,  True],
        [ True,  True,  True,  True]]])

##  out,hx形状及其关系分析(双向多层)

<img src='../../../../Other/img/双向循环神经网络.png'>

上图模型可形式化描述为:

\begin{aligned}
\overrightarrow{\mathbf{h}}_{t} &= {\rm RNN_{FW}}\left(\overrightarrow{\mathbf{h}}_{t-1}, \mathbf{x}_{t}\right) \\
\overleftarrow{\mathbf{h}}_{t} &= {\rm RNN_{BW}}\left(\overleftarrow{\mathbf{h}}_{t+1}, \mathbf{x}_{t}\right) \\
\mathbf{h} &= \left[\overrightarrow{\mathbf{h}}_{t}; \overleftarrow{\mathbf{h}}_{t}\right]
\end{aligned}

In [117]:
lstm_bidirec = nn.LSTM(input_size=100, 
               hidden_size=4,
               num_layers=3,
               bidirectional=True) # 双向循环神经网络

x = torch.randn(5, 3, 100) # 序类长度为5
h_0 = torch.ones((6, 3, 4))
c_0 = torch.ones((6, 3, 4))

out, (h, c) = lstm_bidirec(x, hx=(h_0, c_0))

'''
num_layers=3, input_size=100, hidden_size=4, bidirectional=False
故可得h_{-1}^{(1)}.shape=[2, 3, 4] <-->一正一反 ,h_{-1}^{(2)}.shape=[2, 3, 4] <-->一正一反 ,h_{-1}^{(3)}.shape=[2, 3, 4] <-->一正一反 ---> h_{-1}.shape=[6, 3, 4],c_0类似
'''
for name, param in lstm_bidirec.named_parameters():
    print(name, '  shape=', param.shape)

print('*****************************************')

print(out.shape, h.shape, c.shape)

weight_ih_l0   shape= torch.Size([16, 100])
weight_hh_l0   shape= torch.Size([16, 4])
bias_ih_l0   shape= torch.Size([16])
bias_hh_l0   shape= torch.Size([16])
weight_ih_l0_reverse   shape= torch.Size([16, 100])
weight_hh_l0_reverse   shape= torch.Size([16, 4])
bias_ih_l0_reverse   shape= torch.Size([16])
bias_hh_l0_reverse   shape= torch.Size([16])
weight_ih_l1   shape= torch.Size([16, 8])
weight_hh_l1   shape= torch.Size([16, 4])
bias_ih_l1   shape= torch.Size([16])
bias_hh_l1   shape= torch.Size([16])
weight_ih_l1_reverse   shape= torch.Size([16, 8])
weight_hh_l1_reverse   shape= torch.Size([16, 4])
bias_ih_l1_reverse   shape= torch.Size([16])
bias_hh_l1_reverse   shape= torch.Size([16])
weight_ih_l2   shape= torch.Size([16, 8])
weight_hh_l2   shape= torch.Size([16, 4])
bias_ih_l2   shape= torch.Size([16])
bias_hh_l2   shape= torch.Size([16])
weight_ih_l2_reverse   shape= torch.Size([16, 8])
weight_hh_l2_reverse   shape= torch.Size([16, 4])
bias_ih_l2_reverse   shape= torch.Size([16

In [118]:
np.isin(out.detach().numpy(), h[[4], :, :].detach().numpy()) # 其中h[[4], :, :]为h_{-1}^{(-1)}正向部分

array([[[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[ True,  True,  True,  True, False, False, False, False],
        [ True,  True,  True,  True, False, False, False, False],
        [ True,  True,  True,  True, False, False, False, False]]])

In [133]:
torch.split(out[-1, :, :], 4, dim=1)[0] - h[[4], :, :]

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]], grad_fn=<SubBackward0>)

In [119]:
np.isin(out.detach().numpy(), h[[5], :, :].detach().numpy()) # 其中h[[5], :, :]为h_{-1}^{(-1)}正向部分

array([[[False, False, False, False,  True,  True,  True,  True],
        [False, False, False, False,  True,  True,  True,  True],
        [False, False, False, False,  True,  True,  True,  True]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]],

       [[False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False]]])

In [130]:
torch.split(out[0, :, :], 4, dim=1)[1] - h[[5], :, :]

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]], grad_fn=<SubBackward0>)

### <font color='red'>总结得出:双向多层时,out,hx的关系为(由上面的图易看出):</font>

假设序列长度为5

\begin{equation}
out = 
\begin{bmatrix}
h_{0正}^{-1}	   & h_{4反}^{-1}     \\ 
h_{1正}^{-1}	   & h_{3反}^{-1}     \\ 
h_{2正}^{-1}	   & h_{2反}^{-1}     \\ 
h_{3正}^{-1}	   & h_{1反}^{-1}     \\ 
h_{4正}^{-1}	   & h_{0反}^{-1}     \\ 
\end{bmatrix}
\end{equation}

