In [9]:
import numpy as np

import tensorflow as tf
import keras

from keras.models import Sequential, Model
from keras.layers import SimpleRNN, Dense, Input

# RNN Basics

In [8]:
model = Sequential()
"""
output shape = (batch_size, units) or (batch_size, timesteps(=input_length), units)
batch_input_shape = (batch_size, timesteps(=input_length), input_dim)
input_shape = (input_length, input_dim)
"""
model.add(SimpleRNN(units=3, batch_input_shape=(8,2,10), return_sequences=True))
# model.add(SimpleRNN(units=3, batch_size=8,input_shape=(2,10), return_sequences=True)) equals to above
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_6 (SimpleRNN)    (8, 2, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Implementing RNN from scratch

## pseudo code for RNN

```python
hidden_state_t = 0 # initial hidden state

for input_t in input_length: 
    output_t = tanh(input_t, hidden_state_t) # output is a result of tanh function with input and hidden state from previous time step
    hidden_state_t = output_t # output becomes current time step's hidden state
```

In [16]:
timesteps = 10 # input sequence length (=input_length)
input_dim = 4
hidden_units = 8 # number of neurons in hidden layer

inputs = np.random.random((timesteps, input_dim)) # input data
hidden_state_t = np.zeros((hidden_units,)) # initial hidden state

print(inputs)
print(hidden_state_t)
print(inputs.shape, hidden_state_t.shape)

[[4.98705738e-01 3.43579551e-01 7.29424948e-01 8.83242390e-01]
 [6.57167566e-01 2.75695171e-01 4.18105849e-01 2.87613807e-01]
 [5.04763833e-01 8.07398860e-01 2.44745919e-01 7.93149621e-01]
 [5.17434268e-01 5.93458272e-01 5.39100197e-01 6.17021694e-01]
 [8.98304903e-04 3.89044120e-01 8.07803813e-01 7.09065966e-01]
 [9.63687096e-01 7.73384134e-01 9.84945676e-01 3.96307645e-01]
 [4.44714669e-01 6.57832453e-01 1.38266969e-01 4.39878920e-01]
 [7.79074827e-01 8.72103339e-03 7.89651492e-01 1.76798585e-01]
 [2.88806605e-01 3.78584281e-01 2.04598350e-01 8.16329346e-01]
 [7.73349873e-01 4.73593833e-01 7.84976774e-01 1.81558486e-02]]
[0. 0. 0. 0. 0. 0. 0. 0.]
(10, 4) (8,)


In [17]:
Wx = np.random.random((hidden_units, input_dim)) # input to hidden weight
Wh = np.random.random((hidden_units, hidden_units)) # hidden to hidden weight
b = np.random.random((hidden_units,)) # bias

print(Wx.shape, Wh.shape, b.shape)

(8, 4) (8, 8) (8,)


In [18]:
total_hidden_states = []

for input_t in inputs:
    """
    RNN equation : (Wx * Xt) + (Wh * Ht-1) + b
    """
    output_t = np.tanh(np.dot(Wx, input_t) + np.dot(Wh, hidden_state_t) + b) # shape : (hidden_units, ) = (8, )
    total_hidden_states.append(list(output_t))
    hidden_state_t = output_t # output becomes current time step's hidden state

In [21]:
print(total_hidden_states)

[[0.99049901 0.92493837 0.89850866 0.98886213 0.89177211 0.97707599
  0.92730828 0.9001823 ]
 [0.99999449 0.99989076 0.99989179 0.99999587 0.99993034 0.99946534
  0.9998369  0.99981889]
 [0.99999827 0.99997633 0.99996929 0.99999851 0.99998195 0.99987508
  0.9999071  0.9999501 ]
 [0.99999855 0.9999768  0.99996781 0.99999871 0.99997613 0.99981816
  0.99992724 0.99993875]
 [0.99999847 0.99998269 0.99995566 0.99999811 0.99997488 0.99957686
  0.99989596 0.99989746]
 [0.99999953 0.99998902 0.99998851 0.99999965 0.99997298 0.99992919
  0.99997732 0.99997598]
 [0.99999512 0.99994458 0.99993582 0.99999664 0.99996805 0.99966904
  0.99982996 0.99992139]
 [0.99999819 0.9999462  0.99994156 0.99999856 0.99995337 0.99953928
  0.99994303 0.99986071]
 [0.9999972  0.9999576  0.99993262 0.99999708 0.99997961 0.99967604
  0.99985401 0.99986625]
 [0.99999788 0.99996182 0.99996172 0.9999987  0.99994641 0.9996791
  0.99993868 0.99994085]]
