In [1]:
import tensorflow as tf
import numpy as np

In [2]:
tf.__version__

'1.0.0'

### use tf.contrib.rnn.static_rnn

In [3]:
X_batch = np.array([
    [[0, 1, 2], [9, 8, 7]],
    [[3, 4, 5], [0, 0, 0]],
    [[6, 7, 8], [6, 5, 4]],
    [[9, 0, 1], [3, 2, 1]]
], dtype=np.float32)

seqlen_batch = np.array([2, 1, 2, 2])

In [6]:
n_steps = 2
n_inputs = 3
n_outputs = 2


tf.reset_default_graph()

with tf.device("/gpu:0"):
    X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
    X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]), axis=0)
    basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_outputs)
    output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32)
    outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

    
config = tf.ConfigProto(allow_soft_placement=True, 
                        log_device_placement=True)
with tf.Session(config=config) as sess:
    tf.global_variables_initializer().run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
    
outputs_val

array([[[-0.94808394, -0.73269534],
        [-1.        ,  0.32144389]],

       [[-0.99998671, -0.87594861],
        [ 0.49151343,  0.60313314]],

       [[-1.        , -0.94487244],
        [-0.99998677,  0.65276557]],

       [[-0.76261401, -0.97092772],
        [-0.96212488,  0.76977062]]], dtype=float32)

### use tf.nn.dynamic_rnn

In [36]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
seq_len = tf.placeholder(tf.float32, [None])
basic_cell = tf.contrib.rnn.BasicRNNCell(n_outputs)
y, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=np.float32, sequence_length=seq_len)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    y_val = y.eval(feed_dict={X: X_batch, seq_len: seqlen_batch})
    
y_val

array([[[-0.7389456 ,  0.53873068],
        [-1.        ,  0.99990129]],

       [[-0.99997181,  0.98772043],
        [ 0.        ,  0.        ]],

       [[-1.        ,  0.99974543],
        [-0.9999997 ,  0.99434185]],

       [[-0.99998987,  0.98623681],
        [-0.99672085,  0.75780749]]], dtype=float32)

### MNIST as a sequence

In [39]:
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


mnist = fetch_mldata("MNIST Original")
mnist.data.shape, mnist.target.shape, mnist.data.max()

((70000, 784), (70000,), 255)

In [58]:
def load_mnist(test_size=10000, batch_size=64):
    """return train_generator and test_data
    """
    mnist = fetch_mldata("MNIST Original")
    X, y = (mnist.data / 255.).astype(np.float32), mnist.target.astype(np.float32)
    X = X.reshape([-1, 28, 28])
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=test_size)
    def _train_generator(train_X, train_y):
        
        train_X, train_y = shuffle(train_X, train_y)
        i, n = 0, train_X.shape[0]
        while True:
            i %= n
            yield train_X[i:i+batch_size], train_y[i:i+batch_size]
            i += batch_size
            
    return _train_generator(train_X, train_y), (test_X, test_y)

train_generator, (test_X, test_y) = load_mnist()

In [76]:
n_inputs = 28
n_steps = 28
batch_size = 64
n_hiddens = 100
n_outputs = 10

tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

he_initializer = tf.contrib.layers.variance_scaling_initializer()

with tf.name_scope("rnn"):
    cell = tf.contrib.rnn.LSTMCell(n_hiddens, initializer=he_initializer)
    seq_y, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
    seq_y_vec = seq_y[:,-1,:]
    
with tf.name_scope("softmax"):
    logits = tf.contrib.layers.fully_connected(seq_y_vec, 10, activation_fn=None)
    
with tf.name_scope("metrics"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    match = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(match, tf.float32))
    
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    labels = tf.argmax(logits, axis=1,)
    
with tf.name_scope("summary"):
    loss_summary = tf.summary.scalar("loss", loss)
    accuracy_summary = tf.summary.scalar("accuracy", accuracy)
    all_summary = tf.summary.merge_all()

In [77]:
writer = tf.summary.FileWriter("rnn-mnist", graph=tf.get_default_graph())

In [78]:
n_epoches = 10

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for e in range(n_epoches):
        for b in range(60000 // batch_size):
            X_batch, y_batch = next(train_generator)
            _, summary = sess.run([train_op, all_summary], feed_dict={X: X_batch, y: y_batch})
            if b % 500 == 0:
                train_loss, train_acc = sess.run([loss, accuracy],
                                        feed_dict={X: X_batch, y: y_batch})
                test_acc = sess.run(accuracy,
                                   feed_dict={X: test_X, y: test_y})
                print(train_loss, train_acc, test_acc)
                writer.add_summary(summary=summary, global_step=e*b)

2.2819 0.171875 0.1098
0.344325 0.90625 0.9286
0.205791 0.9375 0.957
0.0559785 1.0 0.9633
0.0948395 0.9375 0.9708
0.0323619 1.0 0.9727
0.0777591 0.984375 0.9756
0.0205686 1.0 0.9787
0.0664518 0.984375 0.9811
0.0396016 0.984375 0.9806
0.00645392 1.0 0.9815
0.0141305 1.0 0.9803
0.0133835 1.0 0.9795
0.0174231 1.0 0.9806
0.0427782 0.984375 0.9829
0.0147423 1.0 0.9789
0.0508408 0.984375 0.9844
0.00365515 1.0 0.9841
0.0109918 1.0 0.9859
0.00428307 1.0 0.9853
