# A Bidirectional Recurrent Neural Network (LSTM) implementation using Tensorflow Library

[paper](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf)

In [4]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import numpy as np

## Import MNIST Dataset

In [5]:
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


To classify images using bidirectional recurrent neural network, we consider every image row as aa sequence of pixels.

Because MNIST image shape is 28*28px, we will handle 28 seuqences of 28 steps for every sample.

## Parameters

In [6]:
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10

## Network Parameters

In [7]:
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)

## tf Graph input

In [8]:
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
istate_fw = tf.placeholder(tf.float32, [None, 2*n_hidden])
istate_bw = tf.placeholder(tf.float32, [None, 2*n_hidden])
y = tf.placeholder(tf.float32, [None, n_classes])

## Define weights and biases

In [10]:
weights = {
    'hidden': tf.Variable(tf.random_normal([n_input, 2*n_hidden])),
    'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
}

In [11]:
biases = {
    'hidden': tf.Variable(tf.random_normal([2*n_hidden])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

## Build model

In [15]:
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):
    
    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
    # reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + biases['hidden']
    
    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.)
    # Backward direction cell
    lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.)
    
    # Split data because rnn  cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
    
    # Get lstm cell output-
    outputs = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
                                          initial_state_fw=istate_fw,
                                          initial_state_bw=istate_bw)
    
    # Linear activation
    # Get inner loop last output
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']

In [16]:
pred = BiRNN(x, istate_fw, istate_bw, weights, biases)

## Define loss and optimizer

In [17]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss

In [18]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam optimizer

## Evaluate the model

In [20]:
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Initialize the variables

In [21]:
init = tf.initialize_all_variables()

## Launch the graph

In [27]:
with tf.Session() as sess:
    sess.run(init)
    step = 1
    
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_xs = batch_xs.reshape((batch_size, n_steps, n_input))
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys,
                                       istate_fw: np.zeros((batch_size, 2*n_hidden)),
                                       istate_bw: np.zeros((batch_size, 2*n_hidden))})
        
        if step % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
                                                istate_fw: np.zeros((batch_size, 2*n_hidden)),
                                                istate_bw: np.zeros((batch_size, 2*n_hidden))})
            # Calculate batch loss
            loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
                                             istate_fw: np.zeros((batch_size, 2*n_hidden)),
                                             istate_bw: np.zeros((batch_size, 2*n_hidden))})
            print("Iter {}, Minibatch Loss={:.6f}, Training Accuracy= {:.5f}".format(step*batch_size, loss, acc))
        step += 1
        
    print("Optimization Finished!")
    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    test_label = mnist.test.labels[:test_len]
    test_acc = sess.run(accuracy, feed_dict={x: test_data, y: test_label, 
                                             istate_fw: np.zeros((test_len, 2*n_hidden)),
                                             istate_bw: np.zeros((test_len, 2*n_hidden))})
    print("Testing Accuracy: {}".format(test_acc))

Iter 1280, Minibatch Loss=10.846947, Training Accuracy= 0.28906
Iter 2560, Minibatch Loss=3.773214, Training Accuracy= 0.28125
Iter 3840, Minibatch Loss=1.720959, Training Accuracy= 0.46094
Iter 5120, Minibatch Loss=1.330209, Training Accuracy= 0.61719
Iter 6400, Minibatch Loss=1.154310, Training Accuracy= 0.61719
Iter 7680, Minibatch Loss=1.157965, Training Accuracy= 0.61719
Iter 8960, Minibatch Loss=0.950154, Training Accuracy= 0.66406
Iter 10240, Minibatch Loss=0.857125, Training Accuracy= 0.71094
Iter 11520, Minibatch Loss=0.769878, Training Accuracy= 0.75000
Iter 12800, Minibatch Loss=0.801022, Training Accuracy= 0.74219
Iter 14080, Minibatch Loss=0.811657, Training Accuracy= 0.70312
Iter 15360, Minibatch Loss=0.870333, Training Accuracy= 0.71094
Iter 16640, Minibatch Loss=0.731207, Training Accuracy= 0.73438
Iter 17920, Minibatch Loss=0.652006, Training Accuracy= 0.80469
Iter 19200, Minibatch Loss=0.645484, Training Accuracy= 0.77344
Iter 20480, Minibatch Loss=0.521848, Training 