In [12]:
import numpy as np 

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [13]:
reset_graph()

In [None]:
import tensorflow as tf

n_inputs = 28 * 28
n_hidden = 100
n_outputs = 5

In [20]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')

training = tf.placeholder_with_default(False, shape=(), name='training')

he_init = tf.contrib.layers.variance_scaling_initializer()

hidden1 = tf.layers.dense(X, n_hidden, name='hidden1', activation=tf.nn.elu, 
                          kernel_initializer=he_init)
hidden2 = tf.layers.dense(hidden1, n_hidden, name='hidden2', activation=tf.nn.elu, 
                          kernel_initializer=he_init)
hidden3 = tf.layers.dense(hidden2, n_hidden, name='hidden3', 
                          activation=tf.nn.elu, kernel_initializer=he_init)
hidden4 = tf.layers.dense(hidden3, n_hidden, name='hidden4', 
                          activation=tf.nn.elu, kernel_initializer=he_init)
hidden5 = tf.layers.dense(hidden4, n_hidden, name='hidden5', 
                          activation=tf.nn.elu, kernel_initializer=he_init)

logits = tf.layers.dense(hidden4, n_outputs, name='outputs', kernel_initializer=he_init)
Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
learning_rate = 0.001
    
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    

In [21]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [22]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [23]:
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

In [26]:
from datetime import datetime

In [59]:
def train(n_epochs=1000, batch_size = 20, max_checks_without_progress=20):
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = 'tf_logs'
    logdir = "{}/run-{}/".format(root_logdir, now)

    loss_summary = tf.summary.scalar('loss', loss)
    file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

    checks_without_progress = 0
    best_loss = np.infty
    step_counter = 0

    with tf.Session() as sess:
        init.run()

        for epoch in range(n_epochs):
            rnd_idx = np.random.permutation(len(X_train1))
            for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
                X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
                sess.run([training_op, extra_update_ops], 
                         feed_dict={ X: X_batch, y: y_batch, training: True })
                if step_counter % 10 == 0:
                    summary_str = loss_summary.eval(feed_dict={X: X_batch, y: y_batch})
                    step = step_counter
                    file_writer.add_summary(summary_str, step)
                step_counter += 1
            loss_val, acc_val = sess.run([loss, accuracy], feed_dict={ X: X_valid1, y: y_valid1 })
            if loss_val < best_loss:
                checks_without_progress = 0
                best_loss = loss_val
                save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            else:
                checks_without_progress += 1
                if checks_without_progress > max_checks_without_progress:
                    file_writer.close()
                    print('Early stop!')
                    break
            print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    with tf.Session() as sess:
        saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
        acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
        print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [72]:
reset_graph()

training = tf.placeholder_with_default(False, shape=(), name='training')
he_init = tf.contrib.layers.variance_scaling_initializer()

In [73]:
from functools import partial

my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=0.9)

def dnn_bn(inputs, n_hidden_layers=5, n_neurons=100, name=None,
        activation=tf.nn.elu, initializer=he_init, with_dropout=False, dropout_rate=0.5):
    with tf.variable_scope(name, 'dnn_bn'):
        for layer in range(n_hidden_layers):
            raw_output = tf.layers.dense(inputs, n_neurons, kernel_initializer=initializer,
                                     name="hidden%d" % (layer + 1))
            bn_inputs = my_batch_norm_layer(raw_output)
            inputs = activation(bn_inputs)
            if with_dropout:
                inputs = tf.layers.dropout(inputs, dropout_rate, training=training)
        return inputs

In [74]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

dnn_bn_outputs = dnn_bn(X, with_dropout=True)

logits = tf.layers.dense(dnn_bn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
learning_rate = 0.001
    
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [75]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

train()

0	Validation loss: 0.120866	Best loss: 0.120866	Accuracy: 96.09%
1	Validation loss: 0.100900	Best loss: 0.100900	Accuracy: 96.52%
2	Validation loss: 0.094376	Best loss: 0.094376	Accuracy: 97.26%
3	Validation loss: 0.090971	Best loss: 0.090971	Accuracy: 97.15%
4	Validation loss: 0.075347	Best loss: 0.075347	Accuracy: 97.65%
5	Validation loss: 0.073190	Best loss: 0.073190	Accuracy: 97.69%
6	Validation loss: 0.070634	Best loss: 0.070634	Accuracy: 97.54%
7	Validation loss: 0.064752	Best loss: 0.064752	Accuracy: 97.73%
8	Validation loss: 0.060272	Best loss: 0.060272	Accuracy: 97.85%
9	Validation loss: 0.059940	Best loss: 0.059940	Accuracy: 98.01%
10	Validation loss: 0.054780	Best loss: 0.054780	Accuracy: 98.16%
11	Validation loss: 0.050559	Best loss: 0.050559	Accuracy: 98.36%
12	Validation loss: 0.053041	Best loss: 0.050559	Accuracy: 98.16%
13	Validation loss: 0.048494	Best loss: 0.048494	Accuracy: 98.48%
14	Validation loss: 0.048790	Best loss: 0.048494	Accuracy: 98.32%
15	Validation loss: 