<a href="https://colab.research.google.com/github/dsogden/NLP-Specialization/blob/main/Chap3_W1_RNN_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from numpy import random
from time import perf_counter
import tensorflow as tf

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
random.seed(10)
emb = 128
T = 256
h_dim = 16
h_0 = np.zeros((h_dim, 1))

w1 = random.standard_normal((h_dim, emb + h_dim))
w2 = random.standard_normal((h_dim, emb + h_dim))
w3 = random.standard_normal((h_dim, emb + h_dim))
b1 = random.standard_normal((h_dim, 1))
b2 = random.standard_normal((h_dim, 1))
b3 = random.standard_normal((h_dim, 1))
# Random initialization of input X
# Note that you add the third dimension (1) to achieve the batch representation.
X = random.standard_normal((T, emb, 1))

# Define the lists of weights as you will need them for the two different layers
weights_vanilla = [w1, b1]
weights_GRU = [w1.copy(), w2, w3, b1.copy(), b2, b3]

In [5]:
def forward_V_RNN(inputs, weights):
    x, h_t = inputs
    wh, bh = weights
    h_t = np.dot(wh, np.concatenate([h_t, x])) + bh
    h_t = sigmoid(h_t)
    y = h_t
    return y, h_t

def forward_GRU(inputs, weights):
    x, h_t = inputs

    # weights.
    wu, wr, wc, bu, br, bc = weights

    u = np.dot(wu, np.concatenate([h_t, x])) + bu
    u = sigmoid(u)

    r = np.dot(wc, np.concatenate([h_t, x])) + br
    r = sigmoid(r)

    c = np.dot(wc, np.concatenate([r * h_t, x])) + bc
    c = np.tanh(c)

    h_t = u * c + (1 - u) * h_t
    y = h_t
    return y, h_t

In [6]:
forward_GRU([X[1], h_0], weights_GRU)[0]

array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]])

In [7]:
def scan(fn, elems, weights, h_0):
    h_t = h_0
    ys = []
    for x in elems:
        y, h_t = fn([x, h_t], weights)
        ys.append(y)
    return ys, h_t

In [10]:
ys, h_t = scan(forward_V_RNN, X, weights_vanilla, h_0)
print(f'Length of ys: {len(ys)}')
print(f'Shape of each y within ys: {ys[0].shape}')
print(f'Shape of h_T: {h_t.shape}')

Length of ys: 256
Shape of each y within ys: (16, 1)
Shape of h_T: (16, 1)


In [11]:
tic = perf_counter()
ys, h_T = scan(forward_V_RNN, X, weights_vanilla, h_0)
toc = perf_counter()
RNN_time=(toc-tic)*1000
print (f"It took {RNN_time:.2f}ms to run the forward method for the vanilla RNN.")

tic = perf_counter()
ys, h_T = scan(forward_GRU, X, weights_GRU, h_0)
toc = perf_counter()
GRU_time=(toc-tic)*1000
print (f"It took {GRU_time:.2f}ms to run the forward method for the GRU.")

It took 9.77ms to run the forward method for the vanilla RNN.
It took 32.42ms to run the forward method for the GRU.
