In [1]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.python.keras.datasets import mnist
from tensorflow.contrib.eager.python import tfe

  from ._conv import register_converters as _register_converters


In [2]:
# enable eager mode
tf.enable_eager_execution()
tf.set_random_seed(0)
np.random.seed(0)

In [3]:
if not os.path.exists('weights/'):
    os.makedirs('weights/')

# constants
units = 64
batch_size = 256
epochs = 2
num_classes = 10

In [4]:
# dataset loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((-1, 28, 28))  # 28 timesteps, 28 inputs / timestep
x_test = x_test.reshape((-1, 28, 28))  # 28 timesteps, 28 inputs / timeste

# one hot encode the labels. convert back to numpy as we cannot use a combination of numpy
# and tensors as input to keras
y_train_ohe = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_ohe = tf.one_hot(y_test, depth=num_classes).numpy()

print('x train', x_train.shape)
print('y train', y_train_ohe.shape)
print('x test', x_test.shape)
print('y test', y_test_ohe.shape)

x train (60000, 28, 28)
y train (60000, 10)
x test (10000, 28, 28)
y test (10000, 10)


# Bi-Directional LSTM

Writing a Bi-directional LSTM in keras is super simple with the Bidirectional wrapper. However the speed of such a model is slower than expected.

Some fixes for it are to use the GPU implementation for all the cells, and to unroll the entire RNN before hand. In normal Keras and Tensorflow, unrolling the RNN yields significant speed improvements since the symbolic loop is replaced with the unrolled graph representation of the RNN. 

In Eager, I don't believe it is doing much to help with the speed.

In [5]:
class BiRNN(tf.keras.Model):
    def __init__(self, units, num_classes, merge_mode='concat', num_layers=1):
        super(BiRNN, self).__init__()
        self.impl = 1 if tfe.num_gpus() == 0 else 2
        self.cells = [tf.keras.layers.LSTMCell(units, implementation=self.impl) for _ in range(num_layers)]
        self.rnn = tf.keras.layers.RNN(self.cells, unroll=True)  # slower if not unrolled - probably because it is using K.rnn() internally.
        self.bidirectional = tf.keras.layers.Bidirectional(self.rnn, merge_mode=merge_mode)
        self.classifier = tf.keras.layers.Dense(num_classes)

    def call(self, inputs, training=None, mask=None):
        x = self.bidirectional(inputs)
        output = self.classifier(x)

        # softmax op does not exist on the gpu, so always use cpu
        with tf.device('/cpu:0'):
            output = tf.nn.softmax(output)

        return output

In [6]:
device = '/cpu:0' if tfe.num_gpus() == 0 else '/gpu:0'

with tf.device(device):
    # build model and optimizer
    model = BiRNN(units, num_classes, num_layers=2)
    model.compile(optimizer=tf.train.AdamOptimizer(0.01), loss='categorical_crossentropy',
                  metrics=['accuracy'])

     # TF Keras tries to use entire dataset to determine shape without this step when using .fit()
    # Fix = Use exactly one sample from the provided input dataset to determine input/output shape/s for the model
    dummy_x = tf.zeros((1, 28, 28))
    model._set_inputs(dummy_x)

    # train
    model.fit(x_train, y_train_ohe, batch_size=batch_size, epochs=epochs,
              validation_data=(x_test, y_test_ohe), verbose=1)

    # evaluate on test set
    scores = model.evaluate(x_test, y_test_ohe, batch_size, verbose=1)
    print("Final test loss and accuracy :", scores)

    saver = tfe.Saver(model.variables)
    saver.save('weights/07_01_bi_rnn/weights.ckpt')

Train on 60000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Final test loss and accuracy : [0.07469581732600927, 0.9748]
