In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
mnist = input_data.read_data_sets('data', one_hot=True)

# hyperparameters
learning_rate = 1e-3
training_iters = 100000
batch_size = 128

n_inputs = 28 # MNIST data input (28, 28<-)
n_steps = 28  # MNIST data input (28<-, 28), equivalent to truncated_backprop_length
n_hidden_units = 128 # neurons in hidden layer, equivalent to state_size
n_classes = 10 # MNIST classes (0-9 digits)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


In [3]:
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}

biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units,])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes,]))
}

In [4]:
def RNN(X, weights, biases):
    # hidden layer from input to cell
    # X is in shape (128, 28, 28) and it is reshaped into (128*28, 28)
    # reshape is needed because the matmul is applied on the 2nd axis of X only.
    X = tf.reshape(X, shape=[-1, n_inputs])
    X_in = tf.matmul(X, weights['in']) + biases['in']
    
    # It is reshaped back to the original
    X_in = tf.reshape(X_in, 
                      shape=[-1, n_steps, n_hidden_units])
    
    # cell
    # lstm cell is consisted of (c_state, h_state)
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden_units, 
                                             forget_bias=1.0, 
                                             state_is_tuple=True)
    
    _init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    
    states_series, current_state = tf.nn.dynamic_rnn(lstm_cell,
                                                    X_in,
                                                    initial_state=_init_state,
                                                    time_major=False)
    
    # hidden layer from cell to y
    # results = tf.matmul(current_state[1], weights['out']) + biases['out']
    
    # or 
    # unstack along the n_steps to a list of [(batch_size, n_hidden_units)]
    outputs = tf.unstack(states_series, axis=1)
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']

    return results

In [5]:
logits = RNN(x, weights, biases)
labels = tf.reshape(y, [-1, n_classes])

entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
loss = tf.reduce_mean(entropy)

train_op = tf.train.AdagradOptimizer(0.3).minimize(loss)

In [6]:
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [7]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with  tf.Session(config=config) as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    step = 0
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
        sess.run([train_op], feed_dict={
            x: batch_xs,
            y: batch_ys,
        })
        if step % 20 == 0:
            print(sess.run(accuracy, feed_dict={
            x: batch_xs,
            y: batch_ys,
            }))
        step += 1
    

0.203125
0.304688
0.296875
0.476562
0.59375
0.609375
0.703125
0.773438
0.71875
0.625
0.78125
0.75
0.859375
0.84375
0.835938
0.84375
0.914062
0.796875
0.898438
0.890625
0.90625
0.882812
0.945312
0.875
0.804688
0.867188
0.945312
0.9375
0.867188
0.953125
0.9375
0.929688
0.953125
0.921875
0.867188
0.945312
0.945312
0.90625
0.914062
0.953125
