# TensorFlow code for a Recurrent Neural Network

In this section we will go through the code for Recurrent Neural Network in TensorFlow.

Built around the implementation by [Aymeric Damien](Built around the implementation by [Aymeric Damien](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/convolutional_network.py)

First of all we set up the required imports and define the location of the mnist data.

In [None]:
from __future__ import division, print_function, absolute_import
import os
from time import time
from datetime import datetime
import tensorflow as tf
from tensorflow.contrib import rnn

tf.logging.set_verbosity(tf.logging.ERROR)

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../scratch/", one_hot=True)

Here are the relevant training and network parameters and graph input for context. To classify images using a recurrent neural network, we consider every row in the image as a sequence. The MNIST images are 28*28px, so we will process 28 sequences of 28 steps for every sample. 

In [None]:
# Training Parameters
learning_rate = 0.001 # Initial learning rate
training_epochs = 10000 # Number of epochs to train
batch_size = 128 # Number of images per batch
display_step = 200 # How often to output model metrics during training

# Network Parameters
num_input = 28 # data input length (img shape: 28*..)
timesteps = 28 # timesteps length (img shape: ..*28)
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input], name='X')
Y = tf.placeholder("float", [None, num_classes], name='Y')

Initialise weights and biases for the network.

In [None]:
weights = {'out': tf.Variable(tf.random_normal([num_hidden, num_classes]), name='Weights')}
biases = {'out': tf.Variable(tf.random_normal([num_classes]), name='Biases')}

### Model Creation

`tf.unstack` - Re-shapes the input for RNN. Input data shape = (batch_size, timesteps, n_input). Shape required for RNN = List of tensors for each timestep of shape (batch_size, n_input)

`rnn.BasicLSTMCell` - Default LSTM recurrent network cell.

`rnn.static_rnn` - Defines a recurrent neural network with the specified cell.

In [None]:
def RNN(x, weights, biases):

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, name='lstm_cell')

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out'], name='out') + biases['out']

### Define loss and optimizer

In the following snippet we define our loss operation, optimiser and initialise our global variables.

`tf.reduce_mean` - Computes the mean of elements across dimensions of a tensor.

`tf.train.GradientDescentOptimizer` - Optimizer that implements the gradient descent algorithm.

`optimizer.minimize` - Takes care of both computing the gradients and applying them with respect to `loss_op`.

In [None]:
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

train_op = optimizer.minimize(loss_op, name='train_op')

### Define evaluation metrics

In [None]:
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

Initialize the variables (i.e. assign their default value)

In [None]:
init = tf.global_variables_initializer()

### Setup tensorboard

In [None]:
# Define writer for tensorbord log output
writer = tf.summary.FileWriter(os.path.join(os.getcwd(),"rnn-tb-" + str(datetime.fromtimestamp(time())) ), graph=tf.get_default_graph())

# Define and name tensorboard histograms
tf.summary.histogram("loss", loss_op)
tf.summary.histogram("accuracy", accuracy)

# Create a summary to monitor cost tensor
#tf.summary.scalar("loss", loss_op)
# Create a summary to monitor accuracy tensor
#tf.summary.scalar("accuracy", accuracy)
# Merge all summaries into a single op

# Merge all summaries into a single output
merged_summary_op = tf.summary.merge_all()

### Train and evaluate the model

In [None]:
from matplotlib import pyplot as plt
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, training_epochs+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc, summary = sess.run([loss_op, accuracy, merged_summary_op], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            writer.add_summary(summary, step)
            
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))
    
    pred = tf.nn.softmax(logits)
    # Build confusion matrix from ground truth labels and model predictions
    conf_mat = tf.confusion_matrix(tf.argmax(Y, 1),tf.argmax(pred, 1)).eval({X: test_data, Y: test_label})
    #print('Confusion matrix:\n', conf_mat)
    %matplotlib inline
    # Plot matrix
    plt.matshow(conf_mat)
    plt.colorbar()
    plt.ylabel('Real Class')
    plt.xlabel('Predicted Class')
    plt.show()

### Setup tensorboard using an ngrok tunnel

In [None]:
import time
import subprocess
import os
import signal

def get_process_pid(pstring):
    pid = None
    for line in os.popen("ps ax | grep " + pstring + " | grep -v grep | grep -v defunct"):
        fields = line.split()
        pid = fields[0]
    return pid

LOG_DIR = os.getcwd()
NG_DIR = LOG_DIR
# Uncomment if running locally
NG_DIR = os.path.dirname(LOG_DIR)
NG_ZIP = os.path.join(NG_DIR, 'ngrok-stable-linux-amd64.zip')
NG_BIN = os.path.join(NG_DIR, 'ngrok')

# Download ngrok binary
if not os.path.isfile(NG_ZIP):
    !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip \
        -P {NG_DIR}
if not os.path.isfile(NG_BIN):        
    !unzip -o {NG_DIR}/ngrok-stable-linux-amd64.zip -d {NG_DIR}

# If tensorboard is alredy running kill it and restart with the correct logdir
tb_pid = get_process_pid('tensorboard')
if tb_pid:
    print("Killing old tensorboard")
    os.kill(int(tb_pid), signal.SIGKILL)
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)
tb_pid = get_process_pid('tensorboard')
print ("Started tensorboard with pid %s" % tb_pid)

# If ngrok is alredy running do nothing
ng_pid = get_process_pid('ngrok')
if not ng_pid:
    proc = subprocess.Popen(['%s/ngrok' % NG_DIR , 'http', '6006'])
    print ("Started ngrok with pid %s" % proc.pid)
    time.sleep(5)
else:
    print ("ngrok alredy runing")
ng_pid = get_process_pid('ngrok')

# Get ngrok link
try:
    ! curl -s http://localhost:4040/api/tunnels | python3 -c \
        "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"
except:
    print("Error getting ngrok link. Retrying...")
    time.sleep(5)
    ! curl -s http://localhost:4040/api/tunnels | python3 -c \
        "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

In [None]:
# Cleanup
#procs = [tb_pid, ng_pid]
#[os.kill(int(x), signal.SIGKILL) for x in procs if x is not None]
#!rm -rf rnn-tb-*

### Experiment
Now try experimenting with the model. What effects do you see when changing the model parameters?
 - learning_rate
 - training_epochs
 - batch_size
 - num_hidden
 
Try adding additional LSTM cells to the model.
 - Hint: Use a stacked cell - ```lstm_cell = rnn.MultiRNNCell( <list_of_lstm_cells> )```

## End of RNN Notebook