In [1]:
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Input
from tensorflow.keras import Model
import numpy as np

In [8]:
input_t = Input((5, 1))
units = 1 #dimensionality of the output space (Hidden and Cell states)
output_t = LSTM(units)(input_t)
model = Model(inputs=input_t, outputs=output_t)
print(model.summary())


Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 5, 1)]            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 1)                 12        
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________
None


---
LSTM expects input data to be a 3D tensor such that:
[batch_size, timesteps, feature]
batch_size how many samples in each batch during training and testing
timesteps means how many values exist in a sequence. For example in [4, 7, 8, 4] there are 4 timesteps
features: how many dimensions are used to represent a data in one time step. For example, if each value in the sequence is one hot encoded with 9 zero and 1 one then feature is 10

Example:

In raw format:
X=[4, 7, 8, 4]

In one hot encoded format with 10 dimensions (feature = 10):

X=[[0 0 0 0 1 0 0 0 0 0]
[0 0 0 0 0 0 0 1 0 0]
[0 0 0 0 0 0 0 0 1 0]
[0 0 0 0 1 0 0 0 0 0]]
---

---
By default, the output of a RNN layer contains a single vector per sample. 
This vector is the RNN cell output corresponding to the last timestep, 
containing information about the entire input sequence. 
The shape of this output is (batch_size, units) where units corresponds 
to the units argument passed to the layer's constructor.

A RNN layer can also return the entire sequence of outputs for each sample 
(one vector per timestep per sample), if you set return_sequences=True. 
The shape of this output is (batch_size, timesteps, units).
---

# To calculate number of parameters

x = the number of features

h = size of the hidden layer (number of neurons in hidden layer)


To calculate, we use the equation

W param = h * x

U param = h * h

Biases Param = h

number of parameters = W param + U param + Biases Param

In [9]:
input_t.shape

TensorShape([None, 5, 1])

In [12]:
# x = input_t.shape[2]
# h = units

x=2308
h=32

parameters = 4 * (((x + h) * h) + h)

In [14]:
print(parameters + 33)

299681


In [60]:
example_input = np.array([[[1],[2],[3],[4]]])
model.predict(example_input)

array([[-0.29205453]], dtype=float32)

Each LSTM gate follows an equation of the form,

$i_t = \sigma(W_i x_t + U_i h_{t-i} + b_i)$

References:

https://web.archive.org/web/20201109035427/http://deeplearning.net/tutorial/lstm.html
https://stackoverflow.com/questions/42861460/how-to-interpret-weights-in-a-lstm-layer-in-keras

https://medium.com/deep-learning-with-keras/lstm-understanding-the-number-of-parameters-c4e087575756

In [61]:
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]

In [63]:
print("W", W.size, ' calculated as 4*features*LSTMoutputDimension ', 4*x*h)
print("U", U.size, ' calculated as 4*LSTMoutputDimension*LSTMoutputDimension ', 4*h*h)
print("b", b.size , ' calculated as 4*LSTMoutputDimension ', 4*h)
print("Total Parameter Number: W+ U + b " , W.size+ U.size + b.size)
print("Total Parameter Number: 4 × ((x + h) × h +h) " , 4* ((x+h)*h+h))

W 4  calculated as 4*features*LSTMoutputDimension  4
U 4  calculated as 4*LSTMoutputDimension*LSTMoutputDimension  4
b 4  calculated as 4*LSTMoutputDimension  4
Total Parameter Number: W+ U + b  12
Total Parameter Number: 4 × ((x + h) × h +h)  12


In [64]:
W_i = W[:, :units]
W_f = W[:, units: units * 2]
W_c = W[:, units * 2: units * 3]
W_o = W[:, units * 3:]

U_i = U[:, :units]
U_f = U[:, units: units * 2]
U_c = U[:, units * 2: units * 3]
U_o = U[:, units * 3:]

b_i = b[:units]
b_f = b[units: units * 2]
b_c = b[units * 2: units * 3]
b_o = b[units * 3:]