<a href="https://colab.research.google.com/github/jvpalhares/deep-learning-rnn-udemy/blob/main/RNNShapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install TensorFlow
!pip install tensorflow-gpu
import tensorflow as tf
print(tf.__version__)

Collecting tensorflow-gpu
  Downloading tensorflow_gpu-2.7.0-cp37-cp37m-manylinux2010_x86_64.whl (489.6 MB)
[K     |████████████████████████████████| 489.6 MB 24 kB/s 
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-2.7.0
2.7.0


In [2]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
# Things you should automatically know and have memorized
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [5]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.rand(N, T, D)

In [6]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [7]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[0.19731775 1.1221784 ]]


In [8]:
# See if we can replicate this output
# Get the weights first
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [9]:
# See what's returned
model.layers[1].get_weights()

[array([[-0.7383409 , -0.20092523, -0.7323593 ,  0.15078491, -0.84255356],
        [-0.2857594 ,  0.8501758 , -0.6055523 ,  0.40790623,  0.02184069],
        [-0.6820763 ,  0.7016558 ,  0.37577516,  0.7172584 ,  0.08997691]],
       dtype=float32),
 array([[-0.06931889, -0.6329891 , -0.38850334, -0.1578282 , -0.64705074],
        [-0.15029436,  0.1823746 , -0.39380425,  0.8772207 , -0.13983268],
        [-0.6611715 , -0.49674866,  0.46054524,  0.23238471,  0.22358066],
        [-0.72952724,  0.44563296, -0.34634942, -0.3821478 , -0.05662471],
        [-0.05699657,  0.3474422 ,  0.60160065,  0.07439511, -0.713146  ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [10]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [11]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [12]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo # we only care about this value on the last iteration
  Yhats.append(y)
  
  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[0.19731777 1.12217834]
