In [43]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import batch_norm, fully_connected, l1_regularizer, variance_scaling_initializer
from tensorflow.contrib.framework import arg_scope
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

In [44]:
def build_mnist_generator(batch_size=64):
    mnist = fetch_mldata("MNIST Original")
    X = (mnist.data / 255).astype(np.float32)
    y = (mnist.target).astype(np.int32)
    X, y = shuffle(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000)
    def train_generator():
        i, n = 0, X_train.shape[0]
        while True:
            i %= n
            yield X_train[i:i+batch_size], y_train[i:i+batch_size]
            i += batch_size
            if i >= n: i = 0
    return train_generator(), (X_test, y_test)

train_generator, (X_test, y_test) = build_mnist_generator()

In [45]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, n_inputs], name="X")
y = tf.placeholder(tf.int32, [None], name="y")
is_training = tf.placeholder(tf.bool, [], name="is_training")

with tf.name_scope("dnn"):
    bn_params = {
        "is_training": is_training,
        "decay": 0.9,
        "updates_collections": None,
        "scale": True
    }

    with arg_scope([fully_connected], 
                   activation_fn=tf.nn.elu,
                   normalizer_fn=batch_norm,
                   normalizer_params=bn_params,
                   weights_initializer=variance_scaling_initializer(),
                   weights_regularizer=l1_regularizer(0.01)):
        hidden1 = fully_connected(X, n_hidden1)
        hidden2 = fully_connected(hidden1, n_hidden2)
        logits = fully_connected(hidden2, n_outputs, activation_fn=None)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)
    base_loss = tf.reduce_mean(xentropy, name="base_loss")
    reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = base_loss#tf.add(base_loss, reg_loss, name="loss")
    
with tf.name_scope("train"):
    train_op = tf.train.AdamOptimizer().minimize(loss)

with tf.name_scope("eval"):
    match = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(match, tf.float32))
    
saver = tf.train.Saver()

In [46]:
init = tf.global_variables_initializer()

n_epochs = 10

with tf.Session() as sess:
    sess.run(init)
    for e in range(n_epochs):
        for i in range(60000//64):
            X_batch, y_batch = next(train_generator)
            sess.run(train_op, feed_dict={X:X_batch, y:y_batch, is_training:True})
            if i % 1000 == 0:
                train_loss = loss.eval(feed_dict={X:X_batch, y:y_batch, is_training:False})
                train_acc = accuracy.eval(feed_dict={X:X_batch, y:y_batch, is_training:False})
                test_acc = accuracy.eval(feed_dict={X:X_test, y:y_test, is_training:False})
                print(train_loss, train_acc, test_acc)
    saver.save(sess, "model.ckpt")

1.85368 0.390625 0.2295
0.150756 1.0 0.9615
0.0698274 1.0 0.9692
0.0555314 1.0 0.9727
0.047498 1.0 0.9744
0.0564467 0.984375 0.9756
0.0110913 1.0 0.9755
0.00926896 1.0 0.9772
0.00337995 1.0 0.9774
0.0357243 0.984375 0.9745
