In [2]:
import torch

#### Recurrent layers

##### LSTM
  
multi-layer long short-term memory (LSTM)  


torch.nn.LSTM(  
    *args, **kwarg)
  
* **input_size** – The number of expected features in the input x
* **hidden_size** – The number of features in the hidden state h
* **num_layers** – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1 스태킹할 LSTM layer의 수
  
  
* bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
  
  
* batch_first – If True, then the input and output tensors are provided as (batch, seq, feature). Default: False
  
  
* dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0
* bidirectional – If True, becomes a bidirectional LSTM. Default: False


$i_t=\sigma(W_{ii}x_t+b_{ii}+W_{hi}h_{(t-1)}+b_{hi})$  
$f_t=\sigma(W_{if}x_t+b_{if}+W_{hf}h_{(t-1)}+b_{hf})$  
$g_t=\tanh(W_{ig}x_t+b_{ig}+W_{hg}h_{(t-1)}+b_{hg})$  
$o_t=\sigma(W_{io}x_t+b_{io}+W_{ho}h_{(t-1)}+b_{ho})$  
$c_t=f_t*c_{t-1}+i_t*g_t$  
$h_t=o_t*tanh(c_t)$


Input: $(L, N, H_{in})$

$H_0$: $(S, N, H_{out})$ 각 lstm layer의 hidden state 중에서 첫번째 것들의 초기화

$C_0$: $(S, N, H_{out})$ 각 lstm layer의 cell state 중에서 첫번째 것들의 초기화

Output: $(L, N, H_{all})$ 

$H_{n}$: $(S, N, H_{out})$ 각 lstm layer의 hidden state 중에서 마지막 것들

$C_{n}$: $(S, N, H_{out})$ 각 lstm layer의 cell state 중에서 마지막 것들

In [1]:
import torch.nn as nn

1. LSTM

In [3]:
help(nn.RNN)

Help on class RNN in module torch.nn.modules.rnn:

class RNN(RNNBase)
 |  RNN(*args, **kwargs)
 |  
 |  Applies a multi-layer Elman RNN with :math:`tanh` or :math:`ReLU` non-linearity to an
 |  input sequence.
 |  
 |  
 |  For each element in the input sequence, each layer computes the following
 |  function:
 |  
 |  .. math::
 |      h_t = \text{tanh}(W_{ih} x_t + b_{ih} + W_{hh} h_{(t-1)} + b_{hh})
 |  
 |  where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is
 |  the input at time `t`, and :math:`h_{(t-1)}` is the hidden state of the
 |  previous layer at time `t-1` or the initial hidden state at time `0`.
 |  If :attr:`nonlinearity` is ``'relu'``, then `ReLU` is used instead of `tanh`.
 |  
 |  Args:
 |      input_size: The number of expected features in the input `x`
 |      hidden_size: The number of features in the hidden state `h`
 |      num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
 |          would mean stacking two RNNs together to for

In [8]:
lstm = nn.LSTM(10, 20, 2) # 2개의 RNN 을 쌓는데, 아래와 같다.
# input feature map size : 10
# hidden state feature size : 20
# recurrent layers size : 2

In [5]:
ipt = torch.randn(5, 3, 10) # input sequence
# seq_len : 5
# batch : 3
# input feature map size : 10

In [6]:
h0 = torch.randn(2, 3, 20) # 상태변수들의 초기값
# recurrent layers size : 2
# batch size : 3
# hidden state feature size : 20

In [7]:
c0 = torch.randn(2, 3, 20) # 상태변수들의 초기값
# recurrent layers size : 2
# batch size : 3
# hidden state feature size : 20

In [9]:
output, (hn, cn) = lstm(ipt, (h0, c0))

In [10]:
output.shape

torch.Size([5, 3, 20])

In [11]:
hn.shape

torch.Size([2, 3, 20])

In [12]:
cn.shape

torch.Size([2, 3, 20])