# RNNs

Using TensorFlow Keras RNN layers (e.g. LSTM, GRU).

In [1]:
import os
import re
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras import backend as K

Load a time-series dataset. I'll use the "Plane" dataset from http://www.cs.ucr.edu/~eamonn/time_series_data/

In [2]:
def load_data(fn):
    """
    Load CSV files in UCR time-series data format
    
    Returns:
        data - numpy array with data of shape (num_examples, num_features)
        labels - numpy array with labels of shape: (num_examples, 1)
    """
    df = pd.read_csv(fn, header=None)
    df_data = df.drop(0, axis=1).values.astype(np.float32)
    df_labels = df.loc[:, df.columns == 0].values.astype(np.uint8)
    return df_data, df_labels

train_data, train_labels = load_data("Plane/Plane_TRAIN")
test_data, test_labels = load_data("Plane/Plane_TEST")

# Information about dataset
num_features = 1
time_steps = train_data.shape[1]
num_classes = len(np.unique(train_labels))
data_info = (time_steps, num_features, num_classes)

Implementing a basic RNN cell. Based on TensorFlow [Keras RNN example](https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN).

In [3]:
class MinimalRNNCell(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        self.units = units
        self.state_size = units
        super(MinimalRNNCell, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
                                      initializer='uniform',
                                      name='kernel')
        self.recurrent_kernel = self.add_weight(
            shape=(self.units, self.units),
            initializer='uniform',
            name='recurrent_kernel')
        self.built = True

    def call(self, inputs, states):
        prev_output = states[0]
        h = K.dot(inputs, self.kernel)
        output = h + K.dot(prev_output, self.recurrent_kernel)
        return output, [output]

Train and test.

In [6]:
def get_dataset(features, labels, num_classes, batch_size, evaluation=False, buffer_size=5000):
    """
    Get the dataset object for feeding into the model
    
    If batch_size==None, then one-hot encode but don't batch (evaluation)
    If batch_size!=None, then repeat, shuffle, and batch (training)
    """
    def map_func(x, y):
        """ One-hot encode y, convert to appropriate data types """
        x_out = tf.cast(tf.expand_dims(x,axis=1), tf.float32)
        y_out = tf.one_hot(tf.squeeze(tf.cast(y, tf.uint8)), depth=num_classes)
        return [x_out, y_out]
    
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
    dataset = dataset.map(map_func)
    
    if evaluation:
        dataset = dataset.batch(batch_size)
    else:
        dataset = dataset.repeat().shuffle(buffer_size).batch(batch_size)
    
    return dataset

def get_model(time_steps, num_features, num_classes, layer_type):
    """ Define RNN model """
    if layer_type == 'lstm':
        layer1 = tf.keras.layers.LSTM(128, return_sequences=True)
        layer2 = tf.keras.layers.LSTM(128, return_sequences=False)
    elif layer_type == 'rnn':
        layer1 = tf.keras.layers.RNN(MinimalRNNCell(128), return_sequences=True)
        layer2 = tf.keras.layers.RNN(MinimalRNNCell(128), return_sequences=False)
    elif layer_type == 'gru':
        layer1 = tf.keras.layers.GRU(128, return_sequences=True)
        layer2 = tf.keras.layers.GRU(128, return_sequences=False)
    
    x = tf.keras.Input((time_steps,1), dtype=tf.float32)
    n = layer1(x)
    n = tf.keras.layers.Dropout(0.5)(n)
    n = layer2(n)
    n = tf.keras.layers.Dropout(0.5)(n)
    n = tf.keras.layers.Dense(num_classes)(n)
    y = tf.keras.layers.Activation('softmax')(n)
    model = tf.keras.Model(x, y)
    
    model.compile(loss=tf.keras.losses.categorical_crossentropy,
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=['accuracy'])
    
    return model

def latest_checkpoint(model_file):
    """ Find latest checkpoint -- https://www.tensorflow.org/tutorials/keras/save_and_restore_models """
    model_path = os.path.dirname(model_file)
    #checkpoints = pathlib.Path(model_path).glob("*.index")
    checkpoints = pathlib.Path(model_path).glob("*.hdf5")
    checkpoints = sorted(checkpoints, key=lambda cp:cp.stat().st_mtime)
    #checkpoints = [cp.with_suffix('') for cp in checkpoints]
    checkpoints = [cp.with_suffix('.hdf5') for cp in checkpoints]
    
    if len(checkpoints) > 0:
        # Get epoch number from filename
        regex = re.compile(r'\d\d+')
        numbers = [int(x) for x in regex.findall(str(checkpoints[-1]))]
        assert len(numbers) == 1, "Could not determine epoch number from filename since multiple numbers"
        epoch = numbers[0]
        
        return str(checkpoints[-1]), epoch
    
    return None, None

def train(data_info, features, labels,
          batch_size=64,
          num_epochs=10,
          model_file="models/{epoch:04d}.hdf5",
          log_dir="logs",
          layer_type="lstm"):
    
    model_path = os.path.dirname(model_file)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    latest, epoch = latest_checkpoint(model_file)

    # Data stats
    time_steps, num_features, num_classes = data_info

    # Get dataset / model
    dataset = get_dataset(features, labels, num_classes, batch_size)
    
    # Load previous weights if found, if not we'll start at epoch 0
    if latest is not None:
        model = tf.keras.models.load_model(latest)
    else:
        model = get_model(time_steps, num_features, num_classes, layer_type)
        epoch = 0
    
    # Train
    model.fit(dataset, initial_epoch=epoch, epochs=num_epochs, steps_per_epoch=30, callbacks=[
        # save_weights_only doesn't work for LSTM apparently, model.get_weights() before saving
        # and after loading differs for LSTM weights but not dense weights, i.e. dense are loaded
        # and LSTM are not -- i.e. saving only weights is useless
        tf.keras.callbacks.ModelCheckpoint(model_file, period=1, verbose=0),
        tf.keras.callbacks.TensorBoard(log_dir),
        tf.keras.callbacks.TerminateOnNaN()
    ])
    
    return model

def evaluate(data_info, features, labels, model=None,
             model_file="models/{epoch:04d}.hdf5",
             useTensorFlowDataset=True):
    
    latest, epoch = latest_checkpoint(model_file)
    
    # Data stats
    time_steps, num_features, num_classes = data_info
    
    # Get dataset
    if useTensorFlowDataset:
        dataset = get_dataset(features, labels, num_classes, 1, evaluation=True)
    else:
        x = np.expand_dims(features,axis=2).astype(np.float32)
        y = np.eye(num_classes)[np.squeeze(labels).astype(np.uint8) - 1] # one-hot encode
    
    # Load weights from last checkpoint if model is not given
    if model is None:
        assert latest is not None, "No latest checkpoint to use for evaluation"
        print("Loading model from", latest, "at epoch", epoch)
        model = tf.keras.models.load_model(latest)
    
    # Evaluate
    if useTensorFlowDataset:
        loss, acc = model.evaluate(dataset, steps=len(labels))
    else:
        loss, acc = model.evaluate(x, y)
    
    return acc

Run training and evaluation.

In [9]:
for layer_type in ['lstm', 'gru']:
    print("Training model:", layer_type)
    tf.reset_default_graph()
    K.clear_session()
    model = train(data_info, train_data, train_labels,
                  model_file=layer_type+"-models/{epoch:04d}.hdf5",
                  log_dir=layer_type+"-logs", layer_type=layer_type)

Training model: lstm
Training model: gru


In [10]:
for layer_type in ['lstm', 'gru']:
    print("Evaluating model:", layer_type)
    print("  Train:", evaluate(data_info, train_data, train_labels,
                               model_file=layer_type+"-models/{epoch:04d}.hdf5"))
    print("  Test:", evaluate(data_info, test_data, test_labels,
                              model_file=layer_type+"-models/{epoch:04d}.hdf5"))

Evaluating model: lstm
Loading model from lstm-models/0010.hdf5 at epoch 10
  Train: 0.8095238095238095
Loading model from lstm-models/0010.hdf5 at epoch 10
  Test: 0.8380952380952381
Evaluating model: gru
Loading model from gru-models/0010.hdf5 at epoch 10
  Train: 0.7238095238095238
Loading model from gru-models/0010.hdf5 at epoch 10
  Test: 0.7238095238095238
