# Sequential MNIST using RNNs

* To classify images using an **RNN**, we consider **every image row** as a **sequence of pixels**. 
* Because MNIST image shape is 28*28px, we will then handle **28 sequences of 28 timesteps** for every sample.

## System Information

In [1]:
from pathlib import Path
import random 
from datetime import datetime

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
import data

### Load Data

#### Step 1: Read in data

In [2]:
batch_size=64
mnist_folder = '../data/'
data.download_mnist(mnist_folder)
train, val, test = data.read_mnist(mnist_folder, flatten=False)

# Add validatation set into training as we don't need it for this example
new_train_images = np.concatenate([train[0],val[0]],axis=0)
new_train_labels = np.concatenate([train[1],val[1]],axis=0)
train = (new_train_images,new_train_labels)
del val

# reshape
train = (train[0].reshape(-1,784),train[1])
test = (test[0].reshape(-1,784),test[1])

../data/train-images-idx3-ubyte.gz already exists
../data/train-labels-idx1-ubyte.gz already exists
../data/t10k-images-idx3-ubyte.gz already exists
../data/t10k-labels-idx1-ubyte.gz already exists


In [3]:
print("Shape of:")
print("- Training-set:\t\t{}".format(train[0].shape))
print("- Test-set:\t\t{}".format(test[0].shape))
print("- Training-labels:\t\t{}".format(train[1].shape))
print("- Test-set-labels:\t\t{}".format(test[1].shape))

Shape of:
- Training-set:		(60000, 784)
- Test-set:		(10000, 784)
- Training-labels:		(60000, 10)
- Test-set-labels:		(10000, 10)


### Create Batch Iterators

In [4]:
def batch(iterable, batch_size=1):
    num_samples = len(iterable)
    for index in range(0, num_samples, batch_size):
        yield iterable[index:min(index + batch_size, num_samples)]        

def batch_sequences(iterable, batch_size=1,timesteps=28,num_input=28):
    num_samples = len(iterable)
    for index in range(0, num_samples, batch_size):
        batch = iterable[index:min(index + batch_size, num_samples)]
        # Reshape data to get 28 sequences of 28 elements
        yield batch.reshape((-1, timesteps, num_input))

#### Create our iterators

In [5]:
batch_x = batch_sequences(train[0],batch_size=batch_size)
batch_y = batch(train[1],batch_size=batch_size)

### Improved Version of the LSTM Model

1. Use LSTMBlockCell, which should be faster than BasicLSTMCell
2. Replace manual weight definitions with tf.layers.Dense
3. Group graph definition together
4. Replace `static_rnn` with `dynamic_rnn`. (So no need to unstack the tensor.)
5. Add a batch_normalization layer between LSTM and Dense layers.
6. Add gradient clipping to guard against exploding gradients
7. Add a checkpoint saver
8. Replace GradientDescentOptimizer with RMSPropOptimizer
9. Use tf.set_random_seed to control randomness
10. Do a test check at every display epoch (not actually best practice, but nice to see).

In [7]:
# Training Parameters
learning_rate = 0.001
#training_steps = 10000
epochs = 60
batch_size = 128
display_step = batch_size * 3

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 256 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])

In [8]:
# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [9]:
# Training Parameters
learning_rate = 0.02
epochs = 60
batch_size = 32
display_epoch = 2

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 64 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

In [10]:
def RNN(x):
    # Define a lstm cell with tensorflow
    lstm_cell = rnn.LSTMBlockCell(
        num_hidden, forget_bias=1.0)

    # Get lstm cell output
    # outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    outputs, states = tf.nn.dynamic_rnn(
        cell=lstm_cell, inputs=x, time_major=False, dtype=tf.float32)
    
    output_layer = tf.layers.Dense(
        num_classes, activation=None, 
        kernel_initializer=tf.orthogonal_initializer()
    )
    return output_layer(tf.layers.batch_normalization(outputs[:, -1, :]))

In [11]:
# Need to clear the default graph before moving forward
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    tf.set_random_seed(1)
    # tf Graph input
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, num_classes])
    logits = RNN(X)
    prediction = tf.nn.softmax(logits)

    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=Y))
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
    gvs = optimizer.compute_gradients(loss_op)
    capped_gvs = [
        (tf.clip_by_norm(grad, 2.), var) if not var.name.startswith("dense") else (grad, var)
        for grad, var in gvs]
    for _, var in gvs:
        if var.name.startswith("dense"):
            print(var.name)    
    train_op = optimizer.apply_gradients(capped_gvs)  

    # Evaluate model (with test logits, for dropout to be disabled)
    correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
    print("All parameters:", np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()]))
    print("Trainable parameters:", np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.trainable_variables()]))    

dense/kernel:0
dense/bias:0
All parameters: 73886
Trainable parameters: 24586


In [12]:
num_batches = len(train[0])//batch_size
print("num batches: ", num_batches)

# Start training
config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
best_val_acc = 0.8
with tf.Session(graph=graph, config=config) as sess:
#with tf.Session() as sess:
    # Run the initializer
    sess.run(init)
    for epoch in range(epochs):
        batch_x = batch_sequences(train[0],batch_size=batch_size)
        batch_y = batch(train[1],batch_size=batch_size)        
        
        for step in range(1, num_batches):
            next_image = next(batch_x)
            next_label = next(batch_y)
            sess.run(train_op, feed_dict={X: next_image, Y: next_label})

        if epoch % display_epoch == 0:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: next_image,
                                                                 Y: next_label})
            
            # Calculate accuracy for 128 mnist test images
            test_len = 128
            test_data = test[0][:test_len].reshape((-1, timesteps, num_input))
            test_label = test[1][:test_len]
            val_acc = sess.run(accuracy, feed_dict={X: test_data, Y: test_label})
            print("Epoch " + str(epoch) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc) + ", Test Accuracy= " + \
                  "{:.3f}".format(val_acc))

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            save_path = saver.save(sess, "logs/31_TF_Sequential_MNIST_Improved/model.ckpt", global_step=step)
            print("Model saved in path: %s" % save_path)
    print("Optimization Finished!")

num batches:  1875
Step 1874, Minibatch Loss= 0.0159, Training Accuracy= 1.000, Test Accuracy= 0.961
Model saved in path: logs/31_TF_Sequential_MNIST_Improved/model.ckpt-1874
Step 1874, Minibatch Loss= 0.1232, Training Accuracy= 0.969, Test Accuracy= 0.969
Model saved in path: logs/31_TF_Sequential_MNIST_Improved/model.ckpt-1874


KeyboardInterrupt: 