In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

## Assignment 8.1 

Build a DNN with five hidden layers of 100 neurons each, He initialization, and the ELU activation function

In [2]:
import tensorflow as tf

n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_hidden3 = 100
n_hidden4 = 100
n_hidden5 = 100
n_outputs = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

  from ._conv import register_converters as _register_converters


In [3]:
from functools import partial

dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=he_init)

In [4]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = dense_layer(X, n_hidden1, name='hidden1')
    hidden2 = dense_layer(hidden1, n_hidden2, name='hidden2')
    hidden3 = dense_layer(hidden2, n_hidden3, name='hidden3')
    hidden4 = dense_layer(hidden3, n_hidden4, name='hidden4')
    hidden5 = dense_layer(hidden4, n_hidden5, name='hidden5')
    logits = dense_layer(hidden5, n_outputs, activation=None, name='outputs')
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

## Assignment 8.2

Using Adam optimization and early stopping, try training it on MNIST but only on digits 0 to 4, as we will use transfer learning for digits 5 to 9 in the next exercise. You will need a softmax output layer with five neurons, and as always make sure to save checkpoints at regular intervals and save the final model so you can reuse it later.

`Adam optimation`: adaptive moment estimation, combines the ideas of Momentum optimization and RMSProp.

`Momentum optimization`: keep track of an exponentially decaying average of past gradients.

`RMSProp`: keep track of an exponentially decaying of past squared gradients.

In [14]:
learning_rate = 0.001

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [15]:
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [7]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/data/')

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [8]:
X_train = mnist.train.images[mnist.train.labels < 5]
y_train = mnist.train.labels[mnist.train.labels < 5]
X_test = mnist.test.images[mnist.test.labels < 5]
y_test = mnist.test.labels[mnist.test.labels < 5]

In [9]:
def shuffle_split(X, y, n_batches):
    np.random.seed(seed=42)
    rnd_idx = np.random.permutation(len(X))
    for i_idx in np.array_split(rnd_idx, n_batches):
        X_batch = X[i_idx]
        y_batch = y[i_idx]
        yield X_batch, y_batch

In [22]:
n_epochs = 50
batch_size = 50
n_batches = len(X_train) // batch_size
best_loss = float('inf')
patience = 2
cnt_patience = 0

init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_split(X_train, y_train, n_batches):
            sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})
        accuracy_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        accuracy_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        loss_test = loss.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, 'loss', loss_test, 'accuracy_train:', accuracy_train, 'accuracy_test:', accuracy_test)
        if loss_test < best_loss:
            best_loss = loss_test
        else:
            cnt_patience += 1
            if cnt_patience > patience:
                'Early stopping!'
                break

0 loss 0.04894622 accuracy_train: 1.0 accuracy_test: 0.9820977
1 loss 0.04120996 accuracy_train: 0.98 accuracy_test: 0.98715705
2 loss 0.03728239 accuracy_train: 1.0 accuracy_test: 0.98813
3 loss 0.030111544 accuracy_train: 1.0 accuracy_test: 0.9910488
4 loss 0.026404124 accuracy_train: 1.0 accuracy_test: 0.9918272
5 loss 0.0332601 accuracy_train: 1.0 accuracy_test: 0.99065965
6 loss 0.030299986 accuracy_train: 1.0 accuracy_test: 0.9918272


Alternatively we can use `tf.keras.callbacks.EarlyStopping`.

## Assignment 8.3

Tune the hyperparameters using cross-validation and see what precision you can achieve.