In [1]:
import tensorflow as tf


In [2]:
from tensorflow.keras.layers import Input, SimpleRNN,Dense,Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# Things you should automatically know and have memorized
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [4]:
# Make some data

N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [5]:
#Make an RNN

M = 5 # number of hidden units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [6]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
[[-0.6359485  -0.92084384]]


In [7]:
# See if we can replicate this output
# Get the weights first
model.summary()

In [8]:
# See what's returned
model.layers[1].get_weights()

[array([[ 0.11662453,  0.3985526 , -0.6301315 , -0.5182423 ,  0.65124184],
        [-0.14209116, -0.8655239 , -0.308024  , -0.3510849 , -0.03390282],
        [-0.33097696, -0.46438557, -0.48592415,  0.788238  ,  0.7232892 ]],
       dtype=float32),
 array([[ 0.65843534,  0.06228849, -0.27750334,  0.47995925, -0.5051873 ],
        [ 0.18187127,  0.9204235 , -0.00164645, -0.00573027,  0.3459882 ],
        [ 0.5003584 , -0.13532771,  0.846048  , -0.07466243,  0.09978123],
        [-0.09054675, -0.19773108,  0.02155281,  0.7798081 ,  0.58663315],
        [-0.52424103,  0.30253848,  0.45467302,  0.39488113, -0.52056044]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [9]:
# Check their shapes
# Should make sense
# First output is hidden > hidden
# Second output is bias term (vector of length M)
a,b,c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [10]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [11]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care about this value on the last iteration
    Yhats.append(y)

    # important
    h_last = h

# print the final output
print(Yhats[-1])

[-0.63594844 -0.9208438 ]
