In [1]:
#requirements
import tensorflow as tf
print 'TensorFlow version:', tf.__version__
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
from tqdm import tqdm

TensorFlow version: 0.10.0


# Recurrent Neural Networks

Re-implementation of 
https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/recurrent_network.ipynb in the the steps presented in the [TensorFlow Mechanics 101 tutorial](https://www.tensorflow.org/versions/r0.10/tutorials/mnist/tf/index.html).

## Prepare the Data

### Customizable parameters

In [2]:
# Parameters
learning_rate = 0.001
training_iters = 500 #10000 # Damien uses 100000 but 500 suffices for a taste
batch_size = 128

### Download

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../data/MNIST_data/", one_hot=True)

Extracting ../data/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../data/MNIST_data/t10k-labels-idx1-ubyte.gz


To recap, the MNIST dataset is used for image classification so the data contains images and the label for each image. Let's take a look at a random record to see what will be doing into the model.

Here we take the first record and break the record into its image and class components:

In [4]:
x0 = mnist.train.images[0]
y0 = mnist.train.labels[0]

The input image is 28 pixels by 28 pixels and are encoded as arrays with a length of 28x28=784:

In [5]:
x0.shape

(784,)

The class label is an array with length 10 and is one-hot encoded to represent the number in the image:

In [6]:
print 'shape: ', y0.shape
print 'contents: ', y0

shape:  (10,)
contents:  [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]


Here we see that this record is labeled with as "7" since [0 0 0 0 0 0 0 1 0 0] represents [0 1 2 3 4 5 6 **7** 8 9]

### Inputs and Placeholders

Rather than feed all 784 pixels into the NN at the same time, we will take advantage of the RNN by feeding in 28 pixels per step. In order to read all 784 pixels we will thus need 28 steps.

In [7]:
# Network Parameters
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 250 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
# Using None as the first dimension allows us to train on batches of one size but predict on another size
# E.g. train on 128 batches but classify one image at a time (which we do at the end here)
x = tf.placeholder("float", [None, n_steps, n_input])
# Again we use None as the first dimension for the same reason as above
y = tf.placeholder("float", [None, n_classes])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([n_classes]))
}

## Build the Graph

### Inference

In [8]:
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
    # Required shape: (batch_size, n_steps, n_input)
    
    # Permuting batch_size and n_steps
    #x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    #x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    #x = tf.split(0, n_steps, x)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)

    # Get lstm cell output
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
    outputs = tf.transpose(outputs, [1, 0, 2])
    last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)
    
    # Linear activation, using rnn inner loop last output
    return tf.matmul(last, weights['out']) + biases['out']

pred = RNN(x, weights, biases)

### Loss

In [9]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))

### Training

In [10]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# This "may consume a large amount of memory" warning occurs because 
# we have an input Tensor with a first dimension of None. We explain
# the reason for this up top.
# 
# We can ignore the "may consume a large amount of memory" warning since
# we predefine that the batch size will be 128.

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


## Train the Model

In [11]:
# start our tensorflow session
sess = tf.Session()
# initialize the varsriables we defined above
init = tf.initialize_all_variables()
sess.run(init)

In [12]:
for i in tqdm(range(training_iters)):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    # Reshape data to get 28 seq of 28 elements
    batch_x = batch_x.reshape((batch_size, n_steps, n_input))
    # Run optimization op (backprop)
    sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})

100%|██████████| 500/500 [05:06<00:00,  3.01it/s]


## Evaluate the Model

### Build the Eval Graph

In [13]:
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

### Eval Output

In [14]:
# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
test_label = mnist.test.labels[:test_len]
print "Testing Accuracy:", \
    sess.run(accuracy, feed_dict={x: test_data, y: test_label})

Testing Accuracy: 0.984375


## Predict a Record

In [15]:
pred_class = tf.argmax(pred, 1)
x_pred, y_pred_actual = mnist.test.next_batch(1)
x_pred = x_pred.reshape((-1, n_steps, n_input))
y_pred_pred = np.zeros([1,len(y_pred_actual[0])])
y_pred_index = pred_class.eval(feed_dict={x: x_pred}, session=sess)
y_pred_pred[0][y_pred_index] = 1

print 'actual   \t', y_pred_actual

print 'predicted\t', y_pred_pred

actual   	[[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]]
predicted	[[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]]
