<a href="https://colab.research.google.com/github/boemer00/deep_learning/blob/main/rnn_shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.models import Sequential, Model
from keras.layers import Input, SimpleRNN, Dense, Flatten
from keras.optimizers import SGD, Adam

Things you should automatically know:
- **N** = number of samples
- **T** = sequence length
- **D** = number of input features
- **M** = number of hidden units
- **K** = number of output units

In [2]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

M = 5 # number of hidden units

In [3]:
def initialize_model():
  model = Sequential()
  model.add(Input(shape=(T,D)))
  model.add(SimpleRNN(M))
  model.add(Dense(K))

  return model

In [4]:
model = initialize_model()

# Get output
y_hat = model.predict(X)
y_hat



array([[ 0.6309167, -0.4028176]], dtype=float32)

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [6]:
# Get weights from the SimpleRNN layer
Wx, Wh, bh = model.layers[0].get_weights()

# Get weights from the Dense layer
W0, b0 = model.layers[1].get_weights()

In [7]:
print(Wx.shape, Wh.shape, bh.shape)

(3, 5) (5, 5) (5,)


In [8]:
# Wx, Wh, bh = model.layers[1].get_weights()
# W0, b0 = model.layers[2].get_weights()

Now, we recreate the function for a Simple RNN:

$$
h_t = \tanh(W_{xh}x_t + W_{hh}h_{t-1} + b_h)
$$

In [9]:
h_last = np.zeros(M)  # initial hidden state
x = X[0]  # the one and only sample
y_hats = []  # where we store outputs

for t in range(T):
  h_t = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h_t.dot(W0) + b0
  y_hats.append(y)

  # important: assign h to h_last
  h_last = h_t

y_hats[-1]

array([ 0.63091662, -0.40281767])

In [10]:
y_hats

[array([ 0.03153414, -0.68965957]),
 array([-0.21473937,  0.57706282]),
 array([-0.12497138,  0.2288551 ]),
 array([-0.58892546,  0.78484788]),
 array([ 0.41536293, -0.6829869 ]),
 array([ 0.66542818, -0.16764368]),
 array([-0.79782581,  0.14309699]),
 array([-0.79588051,  0.59549681]),
 array([-0.03186292,  0.01524419]),
 array([ 0.63091662, -0.40281767])]