TensorFlow implementation of a Recurrent Neural Network (LSTM) that performs dynamic computation over sequences with variable length. This example is using a toy dataset to classify linear sequences. The generated sequences have variable length.


References:

*   Long Short Term Memory, Sepp Hochreiter & Jurgen Schmidhuber, Neural Computation 9(8): 1735-1780, 1997.



In [0]:
from __future__ import print_function

import tensorflow as tf
import random

In [0]:
# toy data generator

class toysequencedata(object):
  """Generate sequence of data with dynamic length
  This class generates samples for training:
  - Class 0 : linear sequences (i.e. [0,1,2,3...])
  - Class 1 : linear sequences (i.e. [1,3,5,7...])
  
  NOTICE:
  We have to pad each sequence to reach 'max_seq_len' for Tensorflow
  consistency (we cannot feed a numpy array with incosistent dimensions).
  The dynamic calculation will then be performed thanks to 'seqlen' attribute
  that records every actual sequence length"""
  
  def __init__(self, n_samples=1000, max_seq_len=20, min_seq_len=3,
              max_value=1000):
    self.data = []
    self.labels=[]
    self.seqlen=[]
    
    for i in range(n_samples):
      # random sequence length
      len = random.randint(min_seq_len, max_seq_len)
      
      # monitor sequence length for Tensorflow dynamic calculation
      self.seqlen.append(len)
      
      #add a random or linear int sequence (50% prob)
      if random.random() < 0.5:
        # generate a linear sequence
        randn_start = random.randint(0, max_value-len)
        s = [[float(i)/max_value] for i in range(randn_start, randn_start+len)]
        
        # pad sequence for dimension consistency
        s += [[0.] for i in range(max_seq_len-len)]
        self.data.append(s)
        self.labels.append([1.0, 0.0])
      else:
        # generate random sequence
        s = [[float(random.randint(0, max_value))/max_value] for i in range(len)]
        # pad sequence for dimension consistency
        s += [[0.] for i in range(max_seq_len-len)]
        self.data.append(s)
        self.labels.append([0.0, 1.0])
    self.batch_id = 0
    
  def next(self, batch_size):
    """Return a batch of data. When data end is reached, start over."""
    if self.batch_id ==len(self.data):
      self.batch_id =0
      
    batch_data = (self.data[self.batch_id:min(self.batch_id + 
                                             batch_size, len(self.data))])
    batch_labels=(self.labels[self.batch_id:min(self.batch_id + 
                                               batch_size, len(self.data))])
    batch_seqlen=(self.seqlen[self.batch_id:min(self.batch_id +
                                               batch_size, len(self.data))])
    self.batch_id = min(self.batch_id + batch_size, len(self.data))
    return batch_data, batch_labels, batch_seqlen
      
  

In [0]:
# model

#training hyperparameters
learning_rate = 0.01
training_steps = 10000
batch_size = 128
display_step = 200

# network hyperparameters
seq_max_len = 20 # sequence max length
n_hidden = 64 # hidden layer number of features
n_classes = 2 # linear sequence or not

trainset = toysequencedata(n_samples=1000, max_seq_len=seq_max_len)
testset = toysequencedata(n_samples=500, max_seq_len=seq_max_len)

# graph input
x = tf.placeholder("float", [None, seq_max_len, 1] )
y = tf.placeholder("float", [None, n_classes])

# placeholder for indicating each sequence length
seqlen = tf.placeholder(tf.int32, [None])

# define weights
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}

biases = {
    'out':tf.Variable(tf.random_normal([n_classes]))
}

In [0]:
def dynamicRNN(x, seqlen, weights, biases):
  #prepare data shape to match 'rnn' function requirements
  #current data input shape: (batch, n_steps, n_input)
  #required shape:'n_steps' tensors list of shape (batch, n_input)
  
  #unstack to get a list of n_steps tensors of shape (batch_size, n_input)
  x= tf.unstack(x, seq_max_len,1)
  
  # define lstm cell with tensorflow
  lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)
  
  #get lstm cell output, providing 'sequence_length' will perform dynamic calculation
  outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32,
                                            sequence_length=seqlen)
  
  # when performing dynamic calculation, we must retrieve the last
  # dynamically computed output, i.e. if a sequence length is 10, we need to 
  # retrieve the 10th output
  
  # however tensorflow doesn't support calculation advanced indexing yet, so we build
  # a custom op that for each sample in batch, get its length and get the 
  # corresponding relevant output
  
  # 'outputs' is a list of output at every timestep, we pack them in a tensor
  # and change back dimenstion to [batch, n_step, n_input]
  outputs = tf.stack(outputs)
  outputs = tf.transpose(outputs, [1,0,2])
  
  # hack to build the indexing and retrieve the right output
  batch_size = tf.shape(outputs)[0]
  
  # start indices for each sample
  index = tf.range(0, batch_size) * seq_max_len + (seqlen-1)
  
  # indexing
  outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)
  
  #linear activation, using outputs computed above
  return tf.matmul(outputs, weights['out'])+biases['out']

In [5]:
pred = dynamicRNN(x, seqlen, weights, biases)

# define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,
                                                                   labels = y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

#evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# initialize the variables
init=tf.global_variables_initializer()

W0820 19:06:33.883038 140571583027072 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W0820 19:06:33.885004 140571583027072 deprecation.py:323] From <ipython-input-4-8d8fa44125d5>:10: __init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
W0820 19:06:33.890672 140571583027072 deprecation.py:323] From <ipython-input-4-8d8fa44125d5>:14: static_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.la

In [7]:
with tf.Session() as sess:
  
  sess.run(init)
  
  for step in range(1, training_steps+1):
    batch_x, batch_y, batch_seqlen = trainset.next(batch_size)
    
    #run optimization op (backprop)
    sess.run(optimizer, feed_dict={x:batch_x, y:batch_y, seqlen:batch_seqlen})
    
    if step%display_step==0 or step==1:
      # calculate batch accuracy and loss
      acc, loss = sess.run([accuracy, cost], feed_dict={x:batch_x, y:batch_y,
                                                      seqlen:batch_seqlen})
      
      print("Step", step, "Loss", loss, "Accuracy", acc)
      
  print("Optimization finished")
  
  # calculate accuracy
  test_data = testset.data
  test_label=testset.labels
  test_seqlen=testset.seqlen
  
  print("Testing accuracy", sess.run(accuracy, feed_dict={x:test_data,
                                                         y:test_label, 
                                                         seqlen:test_seqlen}))

Step 1 Loss 0.9344217 Accuracy 0.46875
Step 200 Loss 0.70305663 Accuracy 0.5234375
Step 400 Loss 0.7011467 Accuracy 0.5390625
Step 600 Loss 0.6976291 Accuracy 0.5390625
Step 800 Loss 0.6918507 Accuracy 0.546875
Step 1000 Loss 0.6798897 Accuracy 0.546875
Step 1200 Loss 0.6482816 Accuracy 0.6328125
Step 1400 Loss 0.58280903 Accuracy 0.6953125
Step 1600 Loss 0.53303623 Accuracy 0.75
Step 1800 Loss 0.52078336 Accuracy 0.765625
Step 2000 Loss 0.5156523 Accuracy 0.7578125
Step 2200 Loss 0.5120056 Accuracy 0.75
Step 2400 Loss 0.5088698 Accuracy 0.75
Step 2600 Loss 0.50592697 Accuracy 0.75
Step 2800 Loss 0.5030202 Accuracy 0.75
Step 3000 Loss 0.5000622 Accuracy 0.75
Step 3200 Loss 0.49700344 Accuracy 0.75
Step 3400 Loss 0.49380025 Accuracy 0.75
Step 3600 Loss 0.4903794 Accuracy 0.7421875
Step 3800 Loss 0.48660076 Accuracy 0.7421875
Step 4000 Loss 0.48220748 Accuracy 0.7421875
Step 4200 Loss 0.47673368 Accuracy 0.7421875
Step 4400 Loss 0.469294 Accuracy 0.7421875
Step 4600 Loss 0.45815504 Accur