In [1]:
# code adapted from https://github.com/ageron/handson-ml/blob/master/14_recurrent_neural_networks.ipynb

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
n_layers = 3

learning_rate = 0.001

In [4]:
# MNIST dataset
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [5]:
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

In [6]:
# create a multi-layer RNN with LSTM memory, ReLU activation, and dropout
cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)
         for layer in range(n_layers)]
cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=0.5)
              for cell in cells]
multi_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)
outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)

In [7]:
top_layer_h_state = states[-1][1]

# softmax output
logits = tf.layers.dense(top_layer_h_state, n_outputs, name="softmax")

# train with cross-entropy loss function and Adam optimizer
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

# evaluation (accuracy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# network initialization
init = tf.global_variables_initializer()

In [8]:
n_epochs = 10
batch_size = 150

# train using batch gradient descent
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("Epoch", epoch, "Train accuracy =", acc_train, "Test accuracy =", acc_test)

Epoch 0 Train accuracy = 0.9066667 Test accuracy = 0.9185
Epoch 1 Train accuracy = 0.94 Test accuracy = 0.9515
Epoch 2 Train accuracy = 0.96 Test accuracy = 0.9648
Epoch 3 Train accuracy = 0.97333336 Test accuracy = 0.9603
Epoch 4 Train accuracy = 0.98 Test accuracy = 0.9651
Epoch 5 Train accuracy = 0.96 Test accuracy = 0.9705
Epoch 6 Train accuracy = 0.96 Test accuracy = 0.9684
Epoch 7 Train accuracy = 0.99333334 Test accuracy = 0.9689
Epoch 8 Train accuracy = 0.9866667 Test accuracy = 0.9741
Epoch 9 Train accuracy = 0.9866667 Test accuracy = 0.971
