# A Vanilla RNN Implementation with `tf.tensor`

*Prepared by Sebastian C. Ibañez*

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
np.random.seed(1)

# Create sequential data
T = 6 # Sequence length
d = 1 # Number of features

data = np.arange(T) + 1

print(f'Sequence = {data}')

Sequence = [1 2 3 4 5 6]


First, let's convert the data into a format that Tensorflow can work with.

In [3]:
# Convert to tf.tensor
batch_size = 1
X = tf.convert_to_tensor(data, dtype=tf.float32) # dtype should be a tf.float (NOT a np.float)
X = tf.reshape(X, (batch_size, T, d)) # shape=(batch, timesteps, feature)
X

<tf.Tensor: shape=(1, 6, 1), dtype=float32, numpy=
array([[[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.]]], dtype=float32)>

Next, let's initialize the hidden state.

In [4]:
# Create initial hidden state
hidden_units = 4
h0 = tf.zeros((batch_size, hidden_units)) # shape=(batch, hidden units)
h0

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0., 0., 0., 0.]], dtype=float32)>

Now let's initialize the weights of the RNN.

In [5]:
# Initialize random weights
tf.random.set_seed(1) # Must be run in the same cell
W_xh = tf.random.normal((batch_size, hidden_units), 0, 1) # shape=(batch, hidden units)
W_hh = tf.random.normal((hidden_units, hidden_units), 0, 1) # shape=(hidden units,  hidden units)
b_h = tf.zeros((hidden_units)) # shape=(hidden units)
print(W_xh)
print(W_hh)
print(b_h)

tf.Tensor([[-1.1012203   1.5457517   0.383644   -0.87965786]], shape=(1, 4), dtype=float32)
tf.Tensor(
[[ 0.40308788 -1.0880209  -0.06309535  1.3365567 ]
 [ 0.7117601  -0.4892865  -0.7642213  -1.0372486 ]
 [-1.2519338   0.02122428 -0.5513758  -1.7431698 ]
 [-0.33536094 -1.0426675   1.0091382   1.2362539 ]], shape=(4, 4), dtype=float32)
tf.Tensor([0. 0. 0. 0.], shape=(4,), dtype=float32)


Now for forward prop!

In [6]:
# Forward prop (w/ tanh activation)
h = h0
print(f'h0 = {h}')
for t in range(T):
    x = X[:, t, :]
    h = tf.math.tanh(tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + b_h)
    print(f'h{t+1} = {h}')

h0 = [[0. 0. 0. 0.]]
h1 = [[-0.80105966  0.9131062   0.36578208 -0.7063606 ]]
h2 = [[-0.9702719  0.9996024 -0.6609227 -0.9999491]]
h3 = [[-0.9489414   0.9999923  -0.19376709 -0.99991894]]
h4 = [[-0.99817336  0.99999964 -0.07197513 -0.9999971 ]]
h5 = [[-0.9998569   1.          0.24250329 -0.99999964]]
h6 = [[-0.99999285  1.          0.42822266 -1.        ]]


Here's a more efficient implementation of the forward prop using only 1 matrix multiplication.

In [7]:
# Forward prop (more efficient)
h = h0
print(f'h0 = {h}')
for t in range(T):
    x = X[:, t, :]
    h = tf.math.tanh(tf.matmul(tf.concat((x, h), axis=1), tf.concat((W_xh, W_hh), axis=0)) + b_h) # More efficient
    print(f'h{t+1} = {h}')

h0 = [[0. 0. 0. 0.]]
h1 = [[-0.80105966  0.9131062   0.36578208 -0.7063606 ]]
h2 = [[-0.9702719   0.9996024  -0.66092277 -0.9999491 ]]
h3 = [[-0.94894147  0.99999225 -0.19376715 -0.99991894]]
h4 = [[-0.9981734   0.99999964 -0.07197513 -0.9999971 ]]
h5 = [[-0.9998569   1.          0.24250317 -0.99999976]]
h6 = [[-0.9999928  1.         0.4282226 -1.       ]]


Finally, we can verify that our model specification is correct using `tf.keras`.

In [8]:
simple_rnn = tf.keras.layers.SimpleRNN(4, return_sequences=True, return_state=True)
simple_rnn(X) # Need to pass sample data to the layer in order to create the actual weight tensors
simple_rnn.set_weights([W_xh, W_hh, b_h]) # Set weights manually

output, final_state = simple_rnn(X)

print(output)
print(final_state)

tf.Tensor(
[[[-0.80105966  0.9131062   0.36578208 -0.7063606 ]
  [-0.9702719   0.9996024  -0.6609227  -0.9999491 ]
  [-0.9489414   0.9999923  -0.19376709 -0.99991894]
  [-0.99817336  0.99999964 -0.07197513 -0.9999971 ]
  [-0.9998569   1.          0.24250329 -0.99999964]
  [-0.99999285  1.          0.42822266 -1.        ]]], shape=(1, 6, 4), dtype=float32)
tf.Tensor([[-0.99999285  1.          0.42822266 -1.        ]], shape=(1, 4), dtype=float32)


To actually train this RNN, we'd need to specificy a loss function, calculate gradients, and update the weights (just like in MDS, when we trained a linear regression).

## References

---

[1] Zhang, A., Lipton, Z. C., Li, M., & Smola, A. J. (2021). [Dive into deep learning](https://d2l.ai/index.html). arXiv preprint arXiv:2106.11342.