In [1]:
from functools import partial
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
learning_rate = 0.01

## Batch Normalization

In [3]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")

training  = tf.placeholder_with_default(False, shape=(), name="training")

hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1")
bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
bn1_act = tf.nn.elu(bn1)
hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=0.9)
bn2_act = tf.nn.elu(bn2)
logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs")
logits = tf.layers.batch_normalization(logits_before_bn, training=training, momentum=0.9)

In [6]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")

training = tf.placeholder_with_default(False, shape=(), name="training")

my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum = 0.9)

hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1")
bn1 = my_batch_norm_layer(hidden1)
bn1_act = tf.nn.elu(bn1)
hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
bn2 = my_batch_norm_layer(hidden2)
bn2_act = tf.nn.elu(bn2)
logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs")
logits = my_batch_norm_layer(logits_before_bn)

In [10]:
tf.reset_default_graph()

batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

he_init = tf.keras.initializers.he_normal()
my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=batch_norm_momentum)
my_dense_layer = partial(tf.layers.dense, kernel_initializer=he_init)

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    extra_update_opts = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(extra_update_opts):
        training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [4]:
n_epochs = 20
batch_size = 200

In [9]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)
    
    save_path = saver.save(sess, "./mnist_batch_norm.ckpt")

0 Test accuracy: 0.8762
1 Test accuracy: 0.9019
2 Test accuracy: 0.9185
3 Test accuracy: 0.9289
4 Test accuracy: 0.9352
5 Test accuracy: 0.9416
6 Test accuracy: 0.9451
7 Test accuracy: 0.9496
8 Test accuracy: 0.9523
9 Test accuracy: 0.9551
10 Test accuracy: 0.9574
11 Test accuracy: 0.9597
12 Test accuracy: 0.9599
13 Test accuracy: 0.9616
14 Test accuracy: 0.9633
15 Test accuracy: 0.9651
16 Test accuracy: 0.9664
17 Test accuracy: 0.967
18 Test accuracy: 0.9682
19 Test accuracy: 0.9687


## Gradient Clipping

In [5]:
tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, activation=tf.nn.relu, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [6]:
threshold = 1.0

optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

In [7]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [8]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [9]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)
        
    save_path = saver.save(sess, "./mnist_gradient_clipping.ckpt")

0 Test accuracy: 0.4397
1 Test accuracy: 0.6602
2 Test accuracy: 0.7055
3 Test accuracy: 0.746
4 Test accuracy: 0.7869
5 Test accuracy: 0.8209
6 Test accuracy: 0.8285
7 Test accuracy: 0.859
8 Test accuracy: 0.8622
9 Test accuracy: 0.8586
10 Test accuracy: 0.8779
11 Test accuracy: 0.8825
12 Test accuracy: 0.891
13 Test accuracy: 0.8931
14 Test accuracy: 0.899
15 Test accuracy: 0.9022
16 Test accuracy: 0.9045
17 Test accuracy: 0.9135
18 Test accuracy: 0.9175
19 Test accuracy: 0.9212


## Reusing Pretrained Layers

In [10]:
tf.reset_default_graph()

In [11]:
saver = tf.train.import_meta_graph("./mnist_gradient_clipping.ckpt.meta")

In [12]:
for op in tf.get_default_graph().get_operations():
    print(op.name)

X
y
hidden1/kernel/Initializer/random_uniform/shape
hidden1/kernel/Initializer/random_uniform/min
hidden1/kernel/Initializer/random_uniform/max
hidden1/kernel/Initializer/random_uniform/RandomUniform
hidden1/kernel/Initializer/random_uniform/sub
hidden1/kernel/Initializer/random_uniform/mul
hidden1/kernel/Initializer/random_uniform
hidden1/kernel
hidden1/kernel/Assign
hidden1/kernel/read
hidden1/bias/Initializer/zeros
hidden1/bias
hidden1/bias/Assign
hidden1/bias/read
dnn/hidden1/MatMul
dnn/hidden1/BiasAdd
dnn/hidden1/Relu
hidden2/kernel/Initializer/random_uniform/shape
hidden2/kernel/Initializer/random_uniform/min
hidden2/kernel/Initializer/random_uniform/max
hidden2/kernel/Initializer/random_uniform/RandomUniform
hidden2/kernel/Initializer/random_uniform/sub
hidden2/kernel/Initializer/random_uniform/mul
hidden2/kernel/Initializer/random_uniform
hidden2/kernel
hidden2/kernel/Assign
hidden2/kernel/read
hidden2/bias/Initializer/zeros
hidden2/bias
hidden2/bias/Assign
hidden2/bias/read
dn

In [14]:
file_writer = tf.summary.FileWriter("./tf_logs", sess.graph)

In [17]:
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
accuracy = tf.get_default_graph().get_tensor_by_name("eval/accuracy:0")
training_op = tf.get_default_graph().get_operation_by_name("GradientDescent")

In [19]:
with tf.Session() as sess:
    saver.restore(sess, "./mnist_gradient_clipping.ckpt")
    
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)
        
    save_path = saver.save(sess, "./mnist_gradient_clipping_longer.ckpt")

INFO:tensorflow:Restoring parameters from ./mnist_gradient_clipping.ckpt
0 Test accuracy: 0.9176
1 Test accuracy: 0.9176
2 Test accuracy: 0.9304
3 Test accuracy: 0.9316
4 Test accuracy: 0.9304
5 Test accuracy: 0.9298
6 Test accuracy: 0.9324
7 Test accuracy: 0.9329
8 Test accuracy: 0.939
9 Test accuracy: 0.9382
10 Test accuracy: 0.9406
11 Test accuracy: 0.9434
12 Test accuracy: 0.945
13 Test accuracy: 0.9418
14 Test accuracy: 0.9479
15 Test accuracy: 0.9432
16 Test accuracy: 0.9498
17 Test accuracy: 0.9415
18 Test accuracy: 0.9477
19 Test accuracy: 0.9483


In [23]:
tf.reset_default_graph()

In [24]:
n_hidden4 = 24
n_outputs = 10

saver = tf.train.import_meta_graph("./mnist_gradient_clipping_longer.ckpt.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
hidden3 = tf.get_default_graph().get_tensor_by_name("dnn/hidden4/Relu:0")

new_hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="new_hidden4")
new_logits = tf.layers.dense(new_hidden4, n_outputs, name="new_outputs")

with tf.name_scope("new_loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=new_logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("new_eval"):
    correct = tf.nn.in_top_k(new_logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
with tf.name_scope("new_train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
init = tf.global_variables_initializer()
new_saver = tf.train.Saver()

In [25]:
with tf.Session() as sess:
    init.run()
    saver.restore(sess, "./mnist_gradient_clipping.ckpt")
    
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)
    
    save_path = new_saver.save(sess, "./mnist_transfer_layers.ckpt")

INFO:tensorflow:Restoring parameters from ./mnist_gradient_clipping.ckpt
0 Test accuracy: 0.9198
1 Test accuracy: 0.9381
2 Test accuracy: 0.9443
3 Test accuracy: 0.9495
4 Test accuracy: 0.9526
5 Test accuracy: 0.9532
6 Test accuracy: 0.9537
7 Test accuracy: 0.958
8 Test accuracy: 0.9575
9 Test accuracy: 0.9582
10 Test accuracy: 0.9607
11 Test accuracy: 0.9616
12 Test accuracy: 0.9622
13 Test accuracy: 0.9637
14 Test accuracy: 0.9624
15 Test accuracy: 0.9645
16 Test accuracy: 0.9648
17 Test accuracy: 0.964
18 Test accuracy: 0.9666
19 Test accuracy: 0.9663


## Freezing Lower Layers

In [33]:
tf.reset_default_graph()

In [34]:
n_hidden4 = 24
n_outputs = 10

saver = tf.train.import_meta_graph("./mnist_gradient_clipping_longer.ckpt.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
hidden3 = tf.get_default_graph().get_tensor_by_name("dnn/hidden3/Relu:0")

new_hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="new_hidden4")
new_logits = tf.layers.dense(new_hidden4, n_outputs, name="new_outputs")

with tf.name_scope("new_loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=new_logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("new_eval"):
    correct = tf.nn.in_top_k(new_logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
with tf.name_scope("new_train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|outputs")
    training_op = optimizer.minimize(loss, var_list=train_vars)
    
init = tf.global_variables_initializer()
new_saver = tf.train.Saver()

In [35]:
with tf.Session() as sess:
    init.run()
    saver.restore(sess, "./mnist_gradient_clipping_longer.ckpt")
    
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)
    
    save_path = new_saver.save(sess, "./mnist_frozen_layers.ckpt")

INFO:tensorflow:Restoring parameters from ./mnist_gradient_clipping_longer.ckpt
0 Test accuracy: 0.6234
1 Test accuracy: 0.7983
2 Test accuracy: 0.8509
3 Test accuracy: 0.8772
4 Test accuracy: 0.8902
5 Test accuracy: 0.8988
6 Test accuracy: 0.9027
7 Test accuracy: 0.9033
8 Test accuracy: 0.9069
9 Test accuracy: 0.9099
10 Test accuracy: 0.9134
11 Test accuracy: 0.9147
12 Test accuracy: 0.9169
13 Test accuracy: 0.9177
14 Test accuracy: 0.9186
15 Test accuracy: 0.9203
16 Test accuracy: 0.9211
17 Test accuracy: 0.9227
18 Test accuracy: 0.9219
19 Test accuracy: 0.923


## Caching Frozen Layers

In [36]:
import numpy as np

In [37]:
n_batches = mnist.train.num_examples // batch_size

saver = tf.train.import_meta_graph("./mnist_gradient_clipping_longer.ckpt.meta")
hidden2 = tf.get_default_graph().get_tensor_by_name("dnn/hidden2/Relu:0")

with tf.Session() as sess:
    init.run()
    saver.restore(sess, "./mnist_gradient_clipping_longer.ckpt")
    
    h2_cache = sess.run(hidden2, feed_dict={X: mnist.train.images})
    
    for epoch in range(n_epochs):
        shuffled_idx = np.random.permutation(mnist.train.num_examples)
        hidden2_batches = np.array_split(h2_cache[shuffled_idx], n_batches)
        y_batches = np.array_split(mnist.train.labels[shuffled_idx], n_batches)
        for hidden2_batch, y_batch in zip(hidden2_batches, y_batches):
            sess.run(training_op, feed_dict={hidden2: hidden2_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)

INFO:tensorflow:Restoring parameters from ./mnist_gradient_clipping_longer.ckpt
0 Test accuracy: 0.566
1 Test accuracy: 0.7132
2 Test accuracy: 0.7835
3 Test accuracy: 0.8174
4 Test accuracy: 0.8355
5 Test accuracy: 0.8527
6 Test accuracy: 0.8663
7 Test accuracy: 0.8742
8 Test accuracy: 0.8802
9 Test accuracy: 0.8837
10 Test accuracy: 0.8893
11 Test accuracy: 0.8939
12 Test accuracy: 0.896
13 Test accuracy: 0.8998
14 Test accuracy: 0.9025
15 Test accuracy: 0.9052
16 Test accuracy: 0.9096
17 Test accuracy: 0.9104
18 Test accuracy: 0.9132
19 Test accuracy: 0.9154


## Learning Rate Scheduling

In [39]:
tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    initial_learning_rate = 0.1
    decay_steps = 10000
    decay_rate = 1/10
    global_step = tf.Variable(0, trainable=False, name="global_step")
    learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step, decay_steps, decay_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
init = tf.global_variables_initializer()

In [40]:
n_epochs = 5
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for interation in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)

0 Test accuracy: 0.9513
1 Test accuracy: 0.9641
2 Test accuracy: 0.9677
3 Test accuracy: 0.9752
4 Test accuracy: 0.9733


## L1 Regularization

In [42]:
tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

scale = 0.001  # l1 regularization hyperparameter

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    my_dense_layer = partial(
        tf.layers.dense, 
        activation=tf.nn.relu, 
        kernel_regularizer=tf.contrib.layers.l1_regularizer(scale)
    )
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    hidden2 = my_dense_layer(hidden1, n_hidden2, name="hidden2")
    logits = my_dense_layer(hidden2, n_outputs, activation=None, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")
    
with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
init = tf.global_variables_initializer()

In [43]:
n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for interation in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)

0 Test accuracy: 0.9103
1 Test accuracy: 0.9112
2 Test accuracy: 0.9065
3 Test accuracy: 0.9077
4 Test accuracy: 0.9191
5 Test accuracy: 0.9246
6 Test accuracy: 0.9296
7 Test accuracy: 0.9293
8 Test accuracy: 0.9308
9 Test accuracy: 0.9351
10 Test accuracy: 0.9353
11 Test accuracy: 0.9376
12 Test accuracy: 0.9333
13 Test accuracy: 0.9364
14 Test accuracy: 0.9383
15 Test accuracy: 0.9385
16 Test accuracy: 0.942
17 Test accuracy: 0.9406
18 Test accuracy: 0.937
19 Test accuracy: 0.9419


## Dropout

In [45]:
tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

dropout_rate = 0.5

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn"):
    X_drop = tf.layers.dropout(X, dropout_rate, training=training)
    hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)
    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)
    logits = tf.layers.dense(hidden2_drop, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
init = tf.global_variables_initializer()

In [46]:
n_epochs = 20
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for interation in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)

0 Test accuracy: 0.9493
1 Test accuracy: 0.9681
2 Test accuracy: 0.9709
3 Test accuracy: 0.9767
4 Test accuracy: 0.9744
5 Test accuracy: 0.9757
6 Test accuracy: 0.9796
7 Test accuracy: 0.975
8 Test accuracy: 0.9806
9 Test accuracy: 0.9799
10 Test accuracy: 0.9814
11 Test accuracy: 0.981
12 Test accuracy: 0.9809
13 Test accuracy: 0.9823
14 Test accuracy: 0.9818
15 Test accuracy: 0.9829
16 Test accuracy: 0.982
17 Test accuracy: 0.9825
18 Test accuracy: 0.9835
19 Test accuracy: 0.9821


## Max Norm

In [47]:
def max_norm_regularizer(threshold, axes=1, name="max_norm", collection="max_norm"):
    def max_norm(weights):
        clipped = tf.clip_by_norm(weights, clip_norm=threshold, axes=axes)
        clip_weights = tf.assign(weights, clipped, name=name)
        tf.add_to_collection(collection, clip_weights)
        return None
    return max_norm

In [48]:
tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

max_norm_reg = max_norm_regularizer(threshold=1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, kernel_regularizer=max_norm_reg, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, kernel_regularizer=max_norm_reg, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
init = tf.global_variables_initializer()

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [49]:
n_epochs = 20
batch_size = 50

clip_all_weights = tf.get_collection("max_norm")

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for interation in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            sess.run(clip_all_weights)
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        
        print(epoch, "Test accuracy:", accuracy_val)

0 Test accuracy: 0.9517
1 Test accuracy: 0.9643
2 Test accuracy: 0.9728
3 Test accuracy: 0.974
4 Test accuracy: 0.9755
5 Test accuracy: 0.9703
6 Test accuracy: 0.9796
7 Test accuracy: 0.9787
8 Test accuracy: 0.9786
9 Test accuracy: 0.9795
10 Test accuracy: 0.9786
11 Test accuracy: 0.9814
12 Test accuracy: 0.9821
13 Test accuracy: 0.9807
14 Test accuracy: 0.9819
15 Test accuracy: 0.9826
16 Test accuracy: 0.9811
17 Test accuracy: 0.982
18 Test accuracy: 0.9814
19 Test accuracy: 0.9819
