In [0]:
%tensorflow_version 2.x
import tensorflow as tf
# layers
from tensorflow.keras.layers import Input, Dense, SimpleRNN, Flatten
# model
from tensorflow.keras.models import Model
# optimizers
from tensorflow.keras.optimizers import Adam, SGD

import numpy as np # linear algebra
import pandas as pd # data manipulation
import matplotlib.pyplot as plt # data visualisation

In [0]:
# Things you should automatically know
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [0]:
# Make some data 
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N,T,D)

In [0]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape = (T, D))
x = SimpleRNN(M)(i) # i'll assume we're doing regression
x = Dense(K)(x)

model = Model(i,x)

In [9]:
# Get the output
Yhat = model.predict(X)
print(Yhat, ' | Yhat shape: ', Yhat.shape)

[[-0.49152413  1.4754773 ]]  | Yhat shape:  (1, 2)


In [10]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 5)                 45        
_________________________________________________________________
dense (Dense)                (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [13]:
# See what's returned from simple_rnn layer weights
model.layers[1].get_weights()

[array([[ 0.48157376,  0.6239118 ,  0.77620476, -0.36523885,  0.7046985 ],
        [ 0.62407786, -0.62856376, -0.07965887, -0.05223566,  0.73875886],
        [ 0.53971547, -0.32652634,  0.25231475, -0.09429473, -0.36331016]],
       dtype=float32),
 array([[-0.25510156, -0.4307544 ,  0.83131003,  0.13111149, -0.20274924],
        [ 0.7697904 , -0.29691958,  0.03825119,  0.5383575 ,  0.16724113],
        [-0.094816  ,  0.7261353 ,  0.38723323,  0.3809731 ,  0.41066712],
        [ 0.3003923 , -0.09217978,  0.2957366 , -0.6840711 ,  0.58809346],
        [ 0.49307224,  0.43648577,  0.2646713 , -0.28264344, -0.6453075 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [15]:
# Chech their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a,b,c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [0]:
# Assign the weight variables
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights() # output is coming from Dense layer which applies the formula below:
# transpose(W0)*X + b0
# So we'll get the bias and weights of output from second layer which specified as model.layers[2] and is Dense

In [18]:
# Now lets calculate the output manually!
# This process comes from pseudocode in my notes
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
  # These formulas are in my notes
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh) 
  y = h.dot(Wo) + bo # we only care about this value on the last iteration
  Yhats.append(y)

  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[-0.49152417  1.47547726]


In [19]:
# What the model predicted:
Yhat
# The results are same so we can say that we calculated the output correctly

array([[-0.49152413,  1.4754773 ]], dtype=float32)