<h3>Dimensions to Consider</h3>
<li>N = number of samples</li>
<li>T = sequence length</li>
<li>D = Number of features</li>
<li>M = number of hidden unites</li>
<li>K = number of output units</li>


In [47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, SimpleRNN
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from IPython.display import display, Latex

# setting the generator seed
rng = np.random.RandomState(42)

In [22]:
# making data
N = 1 # 1 sample
T = 10 # sequence length
D = 3 # input features
M = 5 # number of hidden unites
K = 2 # output units 
X = rng.randn(N, T, D)

In [28]:
# building the model
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)
model = Model(i, x)
model.compile(loss='mse', optimizer=Adam(lr=0.1))
model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5)                 45        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [30]:
# sanity check
yhat = model.predict(X)
print(yhat)

[[0.61979914 0.50029945]]


In [34]:
# model layers
model.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7ff95957fa50>,
 <tensorflow.python.keras.layers.recurrent.SimpleRNN at 0x7ff95969c9d0>,
 <tensorflow.python.keras.layers.core.Dense at 0x7ff95a149f90>]

In [38]:
# hidden layer weights, it has 3 arrays
model.layers[1].get_weights()

[array([[ 0.13274437,  0.14516205, -0.32298732, -0.07638413, -0.83695805],
        [-0.25998598, -0.10737938,  0.5534337 ,  0.66708213,  0.55854446],
        [-0.12088317,  0.58395463, -0.07787549, -0.73673743, -0.22523081]],
       dtype=float32),
 array([[ 0.54498386,  0.74629337, -0.18764403, -0.15247121, -0.2959407 ],
        [ 0.27172825, -0.07464047,  0.66661906,  0.5622108 , -0.40016252],
        [-0.5835363 ,  0.57272464, -0.08232725,  0.5531293 ,  0.13690114],
        [ 0.38914013,  0.14762528,  0.29540795,  0.08997385,  0.85522836],
        [ 0.37042084, -0.2960881 , -0.65296614,  0.58874774,  0.0461678 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [43]:
# get the shape of the arrays
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)
print(f"first array corresponds to input-to-hidden weight D X M {a.shape}")
print(f'second array corresponds to hidden-to-hidden weights M xM {b.shape}')
print(f'3rd array is a vector corresponding to bias term weights {c.shape}')

(3, 5) (5, 5) (5,)
first array corresponds to input-to-hidden weight D X M (3, 5)
second array corresponds to hidden-to-hidden weights M xM (5, 5)
3rd array is a vector corresponding to bias term weights (5,)


In [44]:
# corresponding to RNN formula:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

$$h_t = \sigma(W_{xh}^{T}x_t + W_{hh}^{T}h_{t-1}+b_{h})$$
$$\hat{y}_{t} = \sigma(W_{o}^{T}h_{t}+b_{o})

In [51]:
h_last = np.zeros(M) # corresponds to h0
x = X[0] # one sample
Yhats = []
for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo
    Yhats.append(y)
    # assigning h to h_last
    h_last = h

# the final y
print(Yhats[-1])

[0.61979921 0.50029954]
