In [1]:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

num_layers = 3 # newly added in this tutorials
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15 
state_size = 4
num_classes = 2
echo_step = 3 
batch_size = 5 # number of sample trained in a iteration
num_batches = total_series_length//batch_size//truncated_backprop_length

In [2]:
def generateData():
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)


In [3]:
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])


# Please refers to 
# https://www.tensorflow.org/versions/r1.0/api_docs/python/tf/contrib/rnn/LSTMStateTuple
# cell_state = tf.placeholder(tf.float32, [batch_size, state_size])
# hidden_state = tf.placeholder(tf.float32, [batch_size, state_size])
# init_state = tf.contrib.rnn.LSTMStateTuple(cell_state, hidden_state)
# print("init_state created by LSTMStateTuple:", init_state)

# the aboved code is commented out because we are going to replace the init_state as 
# following
init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])

# Since the TF Multilayer-LSTM-API accepts the states as a tuple of LSTMTuples, 
# we need to first unstack the state, then there will be num_layers of 
# tensor in shape of [2, batch_size, state_size] 
state_per_layer_list = tf.unstack(init_state, axis=0) 
# Then, transform the state_per_layer_list into a list of LSTMStateTuple
rnn_tuple_state = tuple(
    [tf.contrib.rnn.LSTMStateTuple(
            state_per_layer_list[idx][0], 
            state_per_layer_list[idx][1])
     for idx in range(num_layers)
    ]
)

rnn_tuple_state

(LSTMStateTuple(c=<tf.Tensor 'strided_slice:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'strided_slice_1:0' shape=(5, 4) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'strided_slice_2:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'strided_slice_3:0' shape=(5, 4) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'strided_slice_4:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'strided_slice_5:0' shape=(5, 4) dtype=float32>))

In [4]:
# W and b is useless in this example, because the tensorflow will create the 
# weight for the variable in the cell.
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1, state_size)), dtype=tf.float32)


# W2 and b2 is needed as we will use this to calculate the logits of the outputs
W2 = tf.Variable(np.random.rand(state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32)

In [5]:
# batchX_placeholder is in shape of (batch_size, truncated_backprop_length)
# Let's say batch_size=5, and truncated_backprop_length=15
# If we unstack the batchX_placeholder, it will have 15 of (5,) tensor.
# The rnn is trained based on this 5 batches with 15 timesteps each

# inputs_series = tf.unstack(batchX_placeholder, axis=1)
# ^ the above code is replaced by tf.split command
# tf.split will split the batchX_placeholder evenly into truncated_backprop_length
# number of tensor along the 1st-axis
# e.g. batchX_placeholder is of shape (5, 30) and truncated_backprop_length is 3
# then, there will be 10 tensors of shape (5, 3)

# it is not used in this example
if False:
    inputs_series = tf.split(batchX_placeholder, 15, axis=1)
    labels_series = tf.unstack(batchY_placeholder, axis=1)

    print(inputs_series)

In [6]:
# This is basically the forward pass in the RNN.
# For each input in the inputs_series, we concatenate it with the state value.
# Then, we apply the same weights W and biases b on each of these input and
# find the corresponding state value in this time series. 

if False: # for reference only
    current_state = init_state
    states_series = []

    for current_input in inputs_series:
        # (5,) -> (5,1) so as to concatenate with the state
        current_input = tf.reshape(current_input, [batch_size, 1])

        # Calculate the sum of affine transform
        # By concatenating those two tensors you will only use one matrix 
        # multiplication.
        input_and_state_concatenated = tf.concat([current_input, current_state], 1)

        next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)
        states_series.append(next_state)
        current_state = next_state

In [7]:
tf.expand_dims(batchX_placeholder, -1)

<tf.Tensor 'ExpandDims:0' shape=(5, 15, 1) dtype=float32>

In [8]:
# forward pass implemented with the help of tensorflow
# This code resembles the above 
# cell = tf.contrib.rnn.BasicRNNCell(num_units=state_size)

# used for single layer
# cell = tf.contrib.rnn.BasicLSTMCell(num_units=state_size, state_is_tuple=True)
# states_series, current_state = tf.contrib.rnn.static_rnn(cell, inputs_series, init_state)


# used for multi-layer
# cell = tf.contrib.rnn.BasicLSTMCell(num_units=state_size, state_is_tuple=True)
# cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
# states_series, current_state = tf.contrib.rnn.static_rnn(cell, 
#                                                        inputs_series, 
#                                                        initial_state=rnn_tuple_state)


# dynamic_rnn function takes the batch inputs of shape 
# [batch_size, truncated_backprop_length, input_size]
# therefore we need to expand a single dimension on the end.
# The output will be the last state of every layer in the network as an LSTMStateTuple
# stored in current_state, as well as a tensor states_series with the shape of
# [batch_size, truncated_backprop_length, state_size] containing the hidden state of
# the last layer across all timesteps
cell = tf.contrib.rnn.BasicLSTMCell(num_units=state_size, state_is_tuple=True)
cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
states_series, current_state = tf.nn.dynamic_rnn(cell, 
                                                tf.expand_dims(batchX_placeholder, -1),
                                               initial_state=rnn_tuple_state,
                                                time_major=False) # time is not in 1st axis

print (states_series)
print (current_state)

Tensor("rnn/transpose:0", shape=(5, 15, 4), dtype=float32)
(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(5, 4) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(5, 4) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_6:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_7:0' shape=(5, 4) dtype=float32>))


In [9]:
# reshape the states_series to [batch_size*truncated_backprop_length, state_size]
states_series = tf.reshape(states_series, [-1, state_size])

# each layer will return a state, therefore there will be 3 cell_states and hidden_states
# Thus, in each timestep, the rnn will return 3 tuples of LSTM cell state.
print(current_state)

# The state_series will return only the last layer of the LSTM 
print (states_series)

(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(5, 4) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(5, 4) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_6:0' shape=(5, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_7:0' shape=(5, 4) dtype=float32>))
Tensor("Reshape:0", shape=(75, 4), dtype=float32)


In [10]:
# (75, 2)
logits = tf.matmul(states_series, W2) + b2
labels = tf.reshape(batchY_placeholder, [-1])

In [13]:
# Calculating loss

# the logits originally is of shape [batch_size*truncated_bp, num_classes]
# it is first reshaped into [batch_size, truncated_bp, num_classes]
# then, unstack along the 1st axis, so the output will have 'truncated_bp' 
# number of tensor of shape [batch_size, num_classes]
logits_series = tf.unstack(
    tf.reshape(logits,[batch_size, truncated_backprop_length, num_classes]),
    axis=1)
predictions_series = [tf.nn.softmax(logit) for logit in logits_series]

losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)

total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

In [14]:
if False:
    # Calculating loss

    # This calculate the logits of the series X fed intp the rnn by multiplying
    # the hidden state neurons with weights and adding the biases. If the truncated
    # length is of 15, then the length of logits series is also of 15.
    logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
    predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

    losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)\
             for logits, labels in zip(logits_series, labels_series)]

    total_loss = tf.reduce_mean(losses)

    train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

In [15]:
# visualizing the training

def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = \
        np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = \
        np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])

        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    plt.ion()
    plt.figure()
    plt.show()
    loss_list=[]
    
    for epoch_idx in range(num_epochs):
        x,y = generateData()
        # _current_cell_state = np.zeros((batch_size, state_size))
        # _current_hidden_state = np.zeros((batch_size, state_size))
        # The following _current_state is defined for the multi-layered LSTM
        # It defines as followed:
        # (1) the number of layers we have in the RNN model, within each layers,
        # (2) there are 2 states - cell_state and hidden_state
        # (3) Each state will simultaneously consider batch_size of input, and 
        # (4) the number of units of the state is of state_size
        _current_state = np.zeros((num_layers, 2, batch_size, state_size))
        
        print("New data, epoch", epoch_idx)
        
        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length
            
            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]
            
            _total_loss, _train_step, _current_state, _predictions_series = \
            sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder: batchX,
                    batchY_placeholder: batchY,
                    init_state: _current_state
                }
            )
            
            loss_list.append(_total_loss)
            
            if batch_idx%100 == 0:
                print("Step",batch_idx,"Loss", _total_loss)
                plot(loss_list, _predictions_series, batchX, batchY)

plt.ioff()
plt.show()

<matplotlib.figure.Figure at 0x1e07fca7da0>

New data, epoch 0
Step 0 Loss 0.693214
Step 100 Loss 0.688469
Step 200 Loss 0.678645
Step 300 Loss 0.55753
Step 400 Loss 0.562501
Step 500 Loss 0.478465
Step 600 Loss 0.367533
New data, epoch 1
Step 0 Loss 0.310649
Step 100 Loss 0.00877084
Step 200 Loss 0.00408865
Step 300 Loss 0.00293484
Step 400 Loss 0.00204762
Step 500 Loss 0.00154591
Step 600 Loss 0.00136973
New data, epoch 2
Step 0 Loss 0.302162
Step 100 Loss 0.00103956
Step 200 Loss 0.000960993
Step 300 Loss 0.000909116
Step 400 Loss 0.000748888
Step 500 Loss 0.000705278
Step 600 Loss 0.000609384
New data, epoch 3
Step 0 Loss 0.314657
Step 100 Loss 0.000615921
Step 200 Loss 0.000520673
Step 300 Loss 0.000434954
Step 400 Loss 0.000432143
Step 500 Loss 0.000393571
Step 600 Loss 0.000393609
New data, epoch 4
Step 0 Loss 0.417513
Step 100 Loss 0.000363807
Step 200 Loss 0.000408322
Step 300 Loss 0.000368793
Step 400 Loss 0.000338281
Step 500 Loss 0.0003256
Step 600 Loss 0.000340364
New data, epoch 5
Step 0 Loss 0.352023
Step 100 Loss 

In [None]:
current_input = inputs_series[0]
current_input = tf.reshape(current_input, [batch_size, 1])
current_state = init_state
tf.concat([current_input, current_state], 1)