<a href="https://colab.research.google.com/github/abrehamgezahegn/ml-notebooks/blob/master/TF2_0_RNN_Shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install TensorFlow
# !pip install -q tensorflow-gpu==2.0.0-beta1

try:
  %tensorflow_version 2.x  # Colab only.
except Exception:
  pass

import tensorflow as tf
print(tf.__version__)

[K     |████████████████████████████████| 348.9MB 45kB/s 
[K     |████████████████████████████████| 501kB 42.9MB/s 
[K     |████████████████████████████████| 3.1MB 43.5MB/s 
[?25h2.0.0-beta1


In [1]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Things you should automatically know and have memorized
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [2]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [3]:
X[0].shape

(10, 3)

In [4]:
X

array([[[ 1.07201865,  1.17343129, -0.17355935],
        [-0.60753126,  0.21396168, -0.51129369],
        [ 1.01802125,  0.53501479, -1.3430979 ],
        [ 1.71135956,  0.3507869 , -0.88316857],
        [-0.07667067,  0.37413702, -0.90690355],
        [-0.65641321,  1.20911047, -0.32320027],
        [ 1.62606981, -0.69930773, -1.75792388],
        [ 0.11052122, -0.8837098 ,  0.80994552],
        [-0.6876525 ,  1.07436007,  0.16391022],
        [ 0.13889164, -0.86498137,  0.08777094]]])

In [6]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [7]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[0.35161665 0.08591088]]


In [8]:
# See if we can replicate this output
# Get the weights first
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 5)                 45        
_________________________________________________________________
dense (Dense)                (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.layers[1].get_weights()[0].shape

(3, 5)

In [28]:
model.layers[1].get_weights()[1].shape

(5, 5)

In [29]:
model.layers[1].get_weights()[2].shape


(5,)

In [31]:
model.layers[2].get_weights()

[array([[-0.30454606, -0.32411462],
        [-0.73438346,  0.68329024],
        [ 0.299497  ,  0.54123557],
        [-0.67322814,  0.25883365],
        [-0.2507323 ,  0.06196213]], dtype=float32),
 array([0., 0.], dtype=float32)]

In [8]:
# See what's returned
model.layers[1].get_weights()

[array([[-0.20329249,  0.47673887, -0.31710976,  0.45261496,  0.12736279],
        [-0.27829814, -0.63242257, -0.72135246,  0.568363  ,  0.24417084],
        [-0.24732071,  0.02224457, -0.28318483,  0.40702945, -0.6741329 ]],
       dtype=float32),
 array([[-0.46295953, -0.4901098 ,  0.40598527,  0.6060096 ,  0.11571218],
        [-0.06085806, -0.44619498, -0.73997635, -0.00701087,  0.49959213],
        [-0.3421387 ,  0.74448276, -0.2279582 ,  0.4377365 ,  0.29173338],
        [ 0.62830657,  0.0518047 ,  0.36657837,  0.14878654,  0.6678548 ],
        [-0.51974696,  0.06135365,  0.3182236 , -0.64726543,  0.4537401 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [None]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [None]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [None]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
  h = np.tanh(x[t].dot( bh)Wx) + h_last.dot(Wh) +
  y = h.dot(Wo) + bo # we only care about this value on the last iteration
  Yhats.append(y)
  
  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[-0.70623848  0.45167215]


In [None]:
# Bonus exercise: calculate the output for multiple samples at once (N > 1)