In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import h5py

import math


  from ._conv import register_converters as _register_converters


In [2]:
def load_mnist_csv(path = "/data/MNIST/", one_hot = False, shape = None):
    df_train = pd.read_csv(path + "mnist_train.csv", header=None)
    df_test = pd.read_csv(path + "mnist_test.csv", header=None)
    
    X_train = df_train.iloc[:, 1:].values/255
    X_test = df_test.iloc[:, 1:].values/255
    y_train = df_train.iloc[:, 0].values
    y_test = df_test.iloc[:, 0].values
    
    if shape == "2D":
        X_train = X_train.reshape(-1, 28, 28)
        X_test = X_test.reshape(-1, 28, 28)
    
    if one_hot:
        eye = np.eye(len(np.unique(y_train)))
        y_train, y_test = eye[y_train], eye[y_test]
        
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = load_mnist_csv(shape = "2D")


In [3]:
tf.reset_default_graph()
tf.set_random_seed(1)

n_steps = 28
n_inputs = 28
state_size = 400
n_outputs = 10

batch_size = 32

epochs = 1

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

X_sequence = tf.unstack(X, axis=-1)

cell = tf.contrib.rnn.BasicRNNCell(num_units=state_size, activation=tf.nn.tanh)

nrows = tf.shape(X)[0]

#initial_state = cell.zero_state(batch_size=nrows, dtype=tf.float32)
initial_state = tf.truncated_normal(shape=(nrows, state_size), dtype=tf.float32, stddev=0.1)


outputs, states = tf.nn.static_rnn(cell, X_sequence, initial_state=initial_state,  dtype=tf.float32)
print("Outputs: ", outputs)
print("States: ", states)

logits = tf.layers.dense(states, n_outputs)

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
cost = tf.reduce_mean(xentropy)
opt = tf.train.AdamOptimizer(learning_rate).minimize(cost)
y_pred = tf.argmax(logits, axis=1, output_type=tf.int32)
correct = tf.equal(y, y_pred)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()


n_epochs = 100
batch_size = 150

m = len(X_train)
num_batches = math.ceil(m/batch_size)



with tf.Session() as sess:
    init.run()
    for i in range(epochs):
        indices = np.arange(m)
        np.random.shuffle(indices)
        X_train = X_train[indices]
        y_train = y_train[indices]
        for j in range(num_batches):
            X_batch = X_train[j * batch_size: (j+1) * batch_size]
            y_batch = y_train[j * batch_size: (j+1) * batch_size]
            _, cost_, acc_train = sess.run([opt, cost, accuracy], feed_dict={X: X_batch, y: y_batch})
            progress = (j+1)*100//num_batches
            print("epoch: %2d, progress: %3d%%, cost: %.5f, train acc: %.4f" 
                  % (i, progress, cost_, acc_train), end="\r")

        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("\nTest accuracy: %.4f" % acc_test)


Outputs:  [<tf.Tensor 'rnn/basic_rnn_cell/Tanh:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_1:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_2:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_3:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_4:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_5:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_6:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_7:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_8:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_9:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_10:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_11:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_12:0' shape=(?, 400) dtype=float32>, <tf.Tensor 'rnn/basic_rnn_cell/Tanh_13:0' shape=(?, 

In [13]:
tf.reset_default_graph()
tf.set_random_seed(1)

n_steps = 28
n_inputs = 28
state_size = 400
n_outputs = 10

batch_size = 32

epochs = 1

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

X_sequence = tf.unstack(X, axis=-1)

cell = tf.contrib.rnn.BasicLSTMCell(num_units=state_size, activation=tf.nn.tanh, state_is_tuple=True)
outputs, (states_c, states_h) = tf.nn.static_rnn(cell, X_sequence, dtype=tf.float32)
print("States: ", states_h)

logits = tf.layers.dense(states_h, n_outputs)

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
cost = tf.reduce_mean(xentropy)
opt = tf.train.AdamOptimizer(learning_rate).minimize(cost)
y_pred = tf.argmax(logits, axis=1, output_type=tf.int32)
correct = tf.equal(y, y_pred)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()


n_epochs = 100
batch_size = 150

m = len(X_train)
num_batches = math.ceil(m/batch_size)



with tf.Session() as sess:
    init.run()
    for i in range(epochs):
        indices = np.arange(m)
        np.random.shuffle(indices)
        X_train = X_train[indices]
        y_train = y_train[indices]
        for j in range(num_batches):
            X_batch = X_train[j * batch_size: (j+1) * batch_size]
            y_batch = y_train[j * batch_size: (j+1) * batch_size]
            _, cost_, acc_train = sess.run([opt, cost, accuracy], feed_dict={X: X_batch, y: y_batch})
            progress = (j+1)*100//num_batches
            print("epoch: %2d, progress: %3d%%, cost: %.5f, train acc: %.4f" 
                  % (i, progress, cost_, acc_train), end="\r")

        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("\nTest accuracy: %.4f" % acc_test)


States:  Tensor("rnn/basic_lstm_cell/Mul_83:0", shape=(?, 400), dtype=float32)
epoch:  0, progress: 100%, cost: 0.14906, train acc: 0.9600
Test accuracy: 0.9514


In [17]:
tf.reset_default_graph()
tf.set_random_seed(1)

n_steps = 28
n_inputs = 28
state_size = 400
n_outputs = 10

batch_size = 32

epochs = 1

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

X_sequence = tf.unstack(X, axis=-1)

cell = tf.contrib.rnn.BasicLSTMCell(num_units=state_size, activation=tf.nn.tanh, state_is_tuple=True)
outputs, (states_c, states_h) = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32, )
print("States: ", states_h)

logits = tf.layers.dense(states_h, n_outputs)

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
cost = tf.reduce_mean(xentropy)
opt = tf.train.AdamOptimizer(learning_rate).minimize(cost)
y_pred = tf.argmax(logits, axis=1, output_type=tf.int32)
correct = tf.equal(y, y_pred)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()


n_epochs = 100
batch_size = 150

m = len(X_train)
num_batches = math.ceil(m/batch_size)



with tf.Session() as sess:
    init.run()
    for i in range(epochs):
        indices = np.arange(m)
        np.random.shuffle(indices)
        X_train = X_train[indices]
        y_train = y_train[indices]
        for j in range(num_batches):
            X_batch = X_train[j * batch_size: (j+1) * batch_size]
            y_batch = y_train[j * batch_size: (j+1) * batch_size]
            _, cost_, acc_train = sess.run([opt, cost, accuracy], feed_dict={X: X_batch, y: y_batch})
            progress = (j+1)*100//num_batches
            print("epoch: %2d, progress: %3d%%, cost: %.5f, train acc: %.4f" 
                  % (i, progress, cost_, acc_train), end="\r")

        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("\nTest accuracy: %.4f" % acc_test)


States:  Tensor("rnn/while/Exit_4:0", shape=(?, 400), dtype=float32)
epoch:  0, progress: 100%, cost: 0.14378, train acc: 0.9667
Test accuracy: 0.9517
