In [1]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "nn_ex"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
import tensorflow as tf

## Data: MNIST

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

## Neural Network in TF

In [4]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [5]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [10]:
reset_graph()


# Parameters/Variables/Placeholders

n_features = X_train.shape[1]
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

# Dropout parameters
training = tf.placeholder_with_default(False, shape=(), name='training')
dropout_rate = 0  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, dropout_rate, training=training)


# NN Model

from functools import partial

he_init = tf.variance_scaling_initializer()
#scale = 0.01
#kernel_regularizer = tf.contrib.layers.l1_regularizer(scale)
my_dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=he_init) 
                            # option: kernel_regularizer= kernel_regularizer

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X_drop, n_hidden1, name="hidden1")
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)   
    hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name="hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)   
    logits = my_dense_layer(hidden1_drop, n_outputs, activation=None, name="outputs")
    

# Loss/Training/Evaluation

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy") 
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)    

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [15]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 1000
batch_size = 20

# Early Stopping Parameters
max_checks_without_progress = 10
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
            loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./mnist_nn_model_01.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess,"./mnist_nn_model_01.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test*100))

0	Validation loss: 0.218353	Best loss: 0.218353	Accuracy: 94.72%
1	Validation loss: 0.279882	Best loss: 0.218353	Accuracy: 94.78%
2	Validation loss: 0.245630	Best loss: 0.218353	Accuracy: 95.72%
3	Validation loss: 0.262549	Best loss: 0.218353	Accuracy: 95.64%
4	Validation loss: 0.272777	Best loss: 0.218353	Accuracy: 96.16%
5	Validation loss: 0.316229	Best loss: 0.218353	Accuracy: 95.66%
6	Validation loss: 0.353235	Best loss: 0.218353	Accuracy: 96.38%
7	Validation loss: 0.328619	Best loss: 0.218353	Accuracy: 96.18%
8	Validation loss: 0.491521	Best loss: 0.218353	Accuracy: 95.38%
9	Validation loss: 0.421706	Best loss: 0.218353	Accuracy: 96.44%
10	Validation loss: 0.387184	Best loss: 0.218353	Accuracy: 96.64%
Early stopping!
INFO:tensorflow:Restoring parameters from ./mnist_nn_model_01.ckpt
Final test accuracy: 94.55%


### 0-4 MNIST data

In [24]:
id_train04 = (y_train < 5)
id_val04 = (y_valid < 5)
id_test04 = (y_test < 5)

X_train04 = X_train[id_train04]
y_train04 = y_train[id_train04]
X_val04 = X_valid[id_val04]
y_val04 = y_valid[id_val04]
X_test04 = X_test[id_test04]
y_test04 = y_test[id_test04]

In [25]:
print(X_train04.shape, y_train04.shape)
print(X_val04.shape, y_val04.shape)
print(X_test04.shape, y_test04.shape)

(28038, 784) (28038,)
(2558, 784) (2558,)
(5139, 784) (5139,)


## Deeper model
(MNIST 0-4 digits)

In [16]:
he_init = tf.variance_scaling_initializer()

def dnn(inputs, n_hidden_layers, n_neurons, name=None, 
        activation = tf.nn.elu, initializer= he_init):
    with tf.variable_scope(name,'dnn'):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs,n_neurons, activation=activation, 
                                     kernel_initializer = initializer,
                                    name = "hidden%d" % (layer+1))
        return inputs

In [26]:
reset_graph()


# Parameters/Variables/Placeholders

n_features = X_train04.shape[1]
n_hidden = 100
n_outputs = 5

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")


# NN Model

with tf.name_scope("dnn"):
    dnn_outputs = dnn(X, n_hidden_layers=5, n_neurons =n_hidden) 
    logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="outputs")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")
    

# Loss/Training/Evaluation

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy") 
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

with tf.name_scope("train"):
    learning_rate = 0.01
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)    

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [27]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 1000
batch_size = 20

# Early Stopping Parameters
max_checks_without_progress = 10
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train04, y_train04, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_val04, y: y_val04})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./mnist_nn_model_01.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess,"./mnist_nn_model_01.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test04, y: y_test04})
    print("Final test accuracy: {:.2f}%".format(acc_test*100))

0	Validation loss: 0.116407	Best loss: 0.116407	Accuracy: 97.58%
1	Validation loss: 0.180534	Best loss: 0.116407	Accuracy: 97.11%
2	Validation loss: 0.227535	Best loss: 0.116407	Accuracy: 93.86%
3	Validation loss: 0.107346	Best loss: 0.107346	Accuracy: 97.54%
4	Validation loss: 0.302668	Best loss: 0.107346	Accuracy: 95.35%
5	Validation loss: 1.631054	Best loss: 0.107346	Accuracy: 22.01%
6	Validation loss: 1.635262	Best loss: 0.107346	Accuracy: 18.73%
7	Validation loss: 1.671200	Best loss: 0.107346	Accuracy: 22.01%
8	Validation loss: 1.695277	Best loss: 0.107346	Accuracy: 19.27%
9	Validation loss: 1.744607	Best loss: 0.107346	Accuracy: 20.91%
10	Validation loss: 1.629857	Best loss: 0.107346	Accuracy: 22.01%
11	Validation loss: 1.810803	Best loss: 0.107346	Accuracy: 22.01%
12	Validation loss: 1.675703	Best loss: 0.107346	Accuracy: 18.73%
13	Validation loss: 1.633233	Best loss: 0.107346	Accuracy: 20.91%
Early stopping!
INFO:tensorflow:Restoring parameters from ./mnist_nn_model_01.ckpt
Fin