<a href="https://colab.research.google.com/github/hellocybernetics/TensorFlow_Eager_Execution_Tutorials/blob/master/tutorials/02_intermediate/Bidrectional_Recurrent_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

tf.enable_eager_execution()
L = tf.keras.layers
tfe = tf.contrib.eager

In [43]:
# Hyper parameters
num_epochs = 25
num_classes = 10
batch_size = 512
learning_rate = 0.001

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

print("training_data: ", x_train.shape)
print("test_data: ", x_test.shape)
print("training_label: ", y_train.shape)
print("test_label: ", y_test.shape)

training_data:  (60000, 28, 28)
test_data:  (10000, 28, 28)
training_label:  (60000,)
test_label:  (10000,)


In [44]:
x_train_eager = tf.convert_to_tensor(x_train, dtype=tf.float32)
x_test_eager = tf.convert_to_tensor(x_test, dtype=tf.float32)
y_train_eager = tf.reshape(tf.one_hot(y_train, 10), (-1, 10))
y_test_eager = tf.reshape(tf.one_hot(y_test, 10), (-1, 10))

print("training_data: ", x_train_eager.shape)
print("test_data: ", x_test_eager.shape)
print("training_label: ", y_train_eager.shape)
print("test_label: ", y_test_eager.shape)

training_data:  (60000, 28, 28)
test_data:  (10000, 28, 28)
training_label:  (60000, 10)
test_label:  (10000, 10)


### DataSet
You make Dataset using `tf.data.Dataset` Class but Keras API doesn't need this dataset. If you write training loop code manually, `Dataset` class is very useful. And using keras API, you need numpy.array inputs instead of tf.Tensor. I don't know why...so you only need numpy preprocessing (or get numpy.array from tf.Tensor using numpy() method after preprocessing using function of tf).

### NOTE
This notebook we don't need 'tf.data.Dataset'. This code only just for reference.

In [0]:
train_dataset = (
    tf.data.Dataset.from_tensor_slices((x_train_eager, y_train_eager))
    .batch(batch_size)
    .shuffle(10000)
)
train_dataset = train_dataset.repeat()

In [0]:
test_dataset = (
    tf.data.Dataset.from_tensor_slices((x_test_eager, y_test_eager))
    .batch(1000)
    .shuffle(10000)
)
test_dataset = test_dataset.repeat()

### RNN using LSTM
In keras API, LSTM recives inputs tensor whose shape is (batch_size, seqence_length, feature_dim), and output tensor whose shape is (batch_size, fearure_dim).When you need all time sequence data, you have to give `return_sequences=True` to LSTM's constractor. Generally, when you stack LSTM's, you need all sequence data.

We use  just only `tf.keras.layers.Bidirectional` for using Bidrectional LSTM.

In [0]:
class RNN(tf.keras.Model):
    def __init__(self, hidden_size=10, num_layers=2, num_classes=10):
        super(RNN, self).__init__(name='mnist_rnn')
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = self.get_lstm_layers(hidden_size, num_layers)            
        self.fc = L.Dense(num_classes, activation="softmax")
    
    @staticmethod
    def get_lstm_layers(hidden_size, num_layers):
        lstm_layers = []
        # we need all sequence data. write return_sequences=True! 
        for i in range(num_layers-1):
            lstm_layers.append(
                L.Bidirectional(
                    L.CuDNNLSTM(units=hidden_size, 
                                         return_sequences=True)
                )
            )
        # the final layer return only final sequence
        # if you need all sequences, you have to write return_sequences=True.
        lstm_layers.append(
            L.Bidirectional(
                L.CuDNNLSTM(units=hidden_size)
            )
        )
        return tf.keras.Sequential(lstm_layers)
        
    def call(self, x):        
        # Forward propagate LSTM
        out = self.lstm(x)
        out = self.fc(out)
        return out

In [0]:
model = RNN()

In [49]:
optimizer = tf.train.AdamOptimizer(learning_rate)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=["accuracy"])

# Eager Execution initialize parameters when using model.call()
model(x_train_eager[:50])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_4 (Sequential)    multiple                  5760      
_________________________________________________________________
dense_4 (Dense)              multiple                  210       
Total params: 5,970
Trainable params: 5,970
Non-trainable params: 0
_________________________________________________________________


In [50]:
model.fit(x=x_train_eager.numpy(), 
          y=y_train_eager.numpy(), 
          validation_split=0.2, 
          epochs=num_epochs,
          batch_size=batch_size)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7f9e57fb88d0>

In [51]:
test_loss, test_acc = model.evaluate(x=x_test_eager.numpy(), 
                                     y=y_test_eager.numpy())

print("test_accracy: ", test_acc)

test_accracy:  0.95
