# Jonathan Halverson
# Thursday, November 16, 2017
# Dropout

Here we turn off neurons -- both input and hidden -- with some probability in training the model per batch. This regularization approach is called dropout. It slows down training but can improve performance. One must remember to rescale the input values once the procedure is complete. If p = 1/2 then they need to be scaled by 1/2 or 1 - p.

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
tf.reset_default_graph()

In [3]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [4]:
X = tf.placeholder(shape=(None, n_inputs), dtype=tf.float32, name="X")
y = tf.placeholder(shape=(None), dtype=tf.int32, name="y")

In [5]:
training = tf.placeholder_with_default(False, shape=(None), name="training")

In [6]:
dropout_rate = 0.1
X_drop = tf.layers.dropout(X, rate=dropout_rate, training=training)

In [7]:
hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.elu, name="hidden1")
hidden1_drop = tf.layers.dropout(hidden1, rate=dropout_rate, training=training)

In [8]:
hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, activation=tf.nn.elu, name="hidden2")
hidden2_drop = tf.layers.dropout(hidden2, rate=dropout_rate, training=training)

In [9]:
logits = tf.layers.dense(hidden2_drop, n_outputs, activation=None, name="logits")

In [10]:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

In [11]:
learning_rate = 0.01
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True)
training_op = optimizer.minimize(loss)

In [12]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [13]:
init = tf.global_variables_initializer()

In [14]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

In [15]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [16]:
batch_size = 200
n_epochs = 1000

In [17]:
with tf.Session() as sess:
     init.run()
     for epoch in range(n_epochs):
          for iteration in range(mnist.train.num_examples // batch_size):
               X_batch, y_batch = mnist.train.next_batch(batch_size)
               sess.run([training_op, extra_update_ops], feed_dict={training:True, X:X_batch, y:y_batch})
     accuracy_val = accuracy.eval(feed_dict={training:False, X:mnist.test.images, y:mnist.test.labels})
     print(epoch, "Test accuracy=", accuracy_val)

(999, 'Test accuracy=', 0.98650002)


We see that the highest accuracy has been achieved using this method of dropout.