## LSTM
A tiny LSTM networkk sample to understand the architecture of LSTM networks.

In [None]:
import numpy as np
import tensorflow as tf

In [None]:
LSTM_CELL_SIZE = 4 # output size (dimension), which is same as hidden size in the cell

lstm_cell = tf.keras.layers.LSTMCell(units=LSTM_CELL_SIZE)
state = [tf.zeros([1, LSTM_CELL_SIZE]), tf.zeros([1, LSTM_CELL_SIZE])]



In [35]:
state

[<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0., 0., 0., 0.]], dtype=float32)>,
 <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0., 0., 0., 0.]], dtype=float32)>]

In [6]:
sample_input = tf.constant([[3, 2, 2, 2, 2, 2]], dtype=tf.float32)
print(sample_input)

tf.Tensor([[3. 2. 2. 2. 2. 2.]], shape=(1, 6), dtype=float32)


In [12]:
output, state_new = lstm_cell(sample_input, state)

print(state_new)

[<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[-0.03210569,  0.03040035,  0.23020034,  0.02028216]],
      dtype=float32)>, <tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[-0.14249499,  0.3107106 ,  0.25259233,  0.1065513 ]],
      dtype=float32)>]


In [13]:
print(output.numpy())

[[-0.03210569  0.03040035  0.23020034  0.02028216]]


## Stacked LSTM
RNN with stacked LSTM. The output of the first layer will become the input of the second.

In [15]:
input_dim = 6

# create stacked LSTM cell
cells = []

Creating the first layer LSTM cell

In [16]:
from tensorflow.keras.layers import LSTMCell

LSTM_CELL_SIZE_1 = 4 # 4 hidden nodes
cell1 = LSTMCell(units=LSTM_CELL_SIZE_1)
cells.append(cell1)

Creating the second layer LSTM cell

In [17]:
LSTM_CELL_SIZE_2 = 5 # 5 hidden nodes
cell2 = LSTMCell(units=LSTM_CELL_SIZE_2)
cells.append(cell2)

In [19]:
from tensorflow.keras.layers import StackedRNNCells

# stacking a multi-layer LSTM
stacked_lstm = StackedRNNCells(cells)

In [20]:
from tensorflow.keras.layers import RNN

# Create the RNN from stacked_lstm
# batch size x time steps x features
data = tf.keras.Input(shape=(None, input_dim), dtype=tf.float32)
output, state = RNN(stacked_lstm, return_state=True)(data)


In [25]:
# Input sequence length is 3, the dimensionality of the inputs is 6. The input should be a Tensor of shape
# [batch_size, max_time, dimension], in our case it would be (2, 3, 6)

# batch_size, time_steps x _features
sample_input = tf.constant([[[1,2,3,4,3,2], [1,2,1,1,1,2],[1,2,2,2,2,2]],[[1,2,3,4,3,2],[3,2,2,1,1,2],[0,0,0,0,3,2]]])
sample_input

<tf.Tensor: shape=(2, 3, 6), dtype=int32, numpy=
array([[[1, 2, 3, 4, 3, 2],
        [1, 2, 1, 1, 1, 2],
        [1, 2, 2, 2, 2, 2]],

       [[1, 2, 3, 4, 3, 2],
        [3, 2, 2, 1, 1, 2],
        [0, 0, 0, 0, 3, 2]]], dtype=int32)>

In [22]:
output

<KerasTensor: shape=(None, 5) dtype=float32 (created by layer 'rnn')>

In [33]:
model = tf.keras.Model(inputs=data, outputs=output)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 6)]         0         
                                                                 
 rnn (RNN)                   (None, 5)                 376       
                                                                 
Total params: 376
Trainable params: 376
Non-trainable params: 0
_________________________________________________________________


In [34]:

output_value = model.predict(sample_input)
print(output_value)

[[-0.03110039  0.14632788  0.05268353  0.03511842 -0.01406977]
 [-0.00522834  0.13486136  0.03316358  0.02676085  0.00975636]]
