### This is an example implementation of a Recurrent Neural Network (LSTM) using the TensorFlow Library.

Example adopted from [TensorFlow Examples](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/recurrent_network.ipynb)


Chen Chen

12/7/2016

In [2]:
# Import TensorFlow Module
import tensorflow as tf

# Import RNN modules
from tensorflow.python.ops import rnn, rnn_cell

# and numpy for math
import numpy as np

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data

# Extract input data
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# Control Parameters
maxIter = 1e6
batchSize = 128
printStep = 10

# RNN Parameters
learnRate = 0.001
nInput = 28 # Number of input in each MNIST image (row pixels)
nRows = 28
nHidden = 128 # Number of Hidden Units
nClasses = 10 # Total number of classes. We are predicting single digits from 0-9

# TensorFlow Graph Input
x = tf.placeholder(tf.float32, [None, nRows, nInput], name="netInput")
y = tf.placeholder(tf.float32, [None, nClasses], name="netOutput")

# RNN Weight and Bias Matrix
weight = {
    'out': tf.Variable(tf.random_normal([nHidden, nClasses]))
}
bias = {
    'out': tf.Variable(tf.random_normal([nClasses]))
}

In [None]:
def RNN(x, weight, bias):
    # Preprocess data to tensors
    # Raw data shape: (batchSize, nRows, nInput)
    # Tensor shape  : list of nRows tensors each with a shape of (batchSize, nInput)
    
    # Permuting batchSize and nRows
    #  Variable:   x[batchSize, nRows, nInput]  =>  x[ nRows, batchSize, nInput]
    # Dimension:   x[   0     ,   1  ,    2  ]  =>  x[   1  ,     0    ,    2  ]
    x = tf.transpose(x, [1, 0, 2])
    # Reshape x to 2D => [batchSize * nRows, nInput]
    x = tf.reshape(x, [-1, nInput])
    # Split the dimension to get a list of nRows tensors of shape [batchSize, nInput]
    x = tf.split(0, nRows, x)
    
    # Define a LSTM cell
    lstmCell = rnn_cell.BasicLSTMCell(nHidden, forget_bias=1.0)
    
    # Get LSTM cell output
    outputs, states = rnn.rnn(lstmCell, x, dtype=tf.float32)
    
    # Linear activation function
    return tf.matmul(outputs[-1], weight['out']) + bias['out']

# Use the function defined above to convert data into list of tensors
# And use LSTM RNN to predict the output
predY = RNN(x, weight, bias)

# Define cost function and optimizer
costFun = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(predY, y))
optimizerFun = tf.train.AdamOptimizer(learning_rate=learnRate).minimize(costFun)

# Evaluate Model
predResult = tf.equal(tf.argmax(predY, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(predResult, tf.float32))

# Initializing all variables
init = tf.global_variables_initializer()

In [None]:
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batchSize < maxIter:
        batch_x, batch_y = mnist.train.next_batch(batchSize)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batchSize, nRows, nInput))
        # Run optimization op (backprop)
        sess.run(optimizerFun, feed_dict={x: batch_x, y: batch_y})
        if step % printStep == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            # Calculate batch loss
            loss = sess.run(costFun, feed_dict={x: batch_x, y: batch_y})
            print("Iter ", str(step*batchSize), ", Minibatch Loss= ",
                  "{:.6f}".format(loss), ", Training Accuracy= ",
                  "{:.5f}".format(acc))
        step += 1
    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, nRows, nInput))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:",
          sess.run(accuracy, feed_dict={x: test_data, y: test_label}))

Iter  1280 , Minibatch Loss=  1.590937 , Training Accuracy=  0.45312
Iter  2560 , Minibatch Loss=  1.362887 , Training Accuracy=  0.53125
Iter  3840 , Minibatch Loss=  1.085207 , Training Accuracy=  0.62500
Iter  5120 , Minibatch Loss=  0.857014 , Training Accuracy=  0.70312
Iter  6400 , Minibatch Loss=  0.764124 , Training Accuracy=  0.75781
Iter  7680 , Minibatch Loss=  1.044456 , Training Accuracy=  0.61719
Iter  8960 , Minibatch Loss=  0.754498 , Training Accuracy=  0.78906
Iter  10240 , Minibatch Loss=  0.576162 , Training Accuracy=  0.83594
Iter  11520 , Minibatch Loss=  0.361728 , Training Accuracy=  0.88281
Iter  12800 , Minibatch Loss=  0.632104 , Training Accuracy=  0.78906
Iter  14080 , Minibatch Loss=  0.436334 , Training Accuracy=  0.85938
Iter  15360 , Minibatch Loss=  0.353112 , Training Accuracy=  0.88281
Iter  16640 , Minibatch Loss=  0.394575 , Training Accuracy=  0.89062
Iter  17920 , Minibatch Loss=  0.263487 , Training Accuracy=  0.88281
Iter  19200 , Minibatch Los