# Chapter 10 Introduction to Artificial Neural Network

In [1]:
import numpy as np
import tensorflow as tf
import os
from tensorflow import logging

# Suppress verbose output
logging.set_verbosity(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

## TensorFlow's Estimator API

Tf.estimator provides a quick and simple way to make an out of the box multi-layer perceptron. Let's train a model on the handwritten digits dataset.

In [2]:
# Create training, validation, and testing data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [3]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
# One 300-neuron and one 100-neuron layer with 10 outputs
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)
input_fn = tf.estimator.inputs.numpy_input_fn(
            x={'X': X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn)

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f57d4177208>

In [4]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
                x={'X': X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)
eval_results

{'accuracy': 0.9799,
 'average_loss': 0.11174656,
 'loss': 14.145134,
 'global_step': 44000}

In [5]:
y_pred_generator = dnn_clf.predict(input_fn = test_input_fn)
y_pred = list(y_pred_generator)
y_pred[0]

{'logits': array([ -2.3311846,  -4.0713058,   3.514548 ,   4.738897 ,  -6.9394183,
         -2.3357232, -10.762614 ,  24.034058 ,  -4.6445155,   5.932176 ],
       dtype=float32),
 'probabilities': array([3.5458544e-12, 6.2229394e-13, 1.2259984e-09, 4.1707904e-09,
        3.5350056e-14, 3.5297943e-12, 7.7267526e-16, 1.0000000e+00,
        3.5079527e-13, 1.3754758e-08], dtype=float32),
 'class_ids': array([7]),
 'classes': array([b'7'], dtype=object)}

## Plain TensorFlow

In [6]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
learning_rate = 0.01

### Construction Phase

In [7]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, name='hidden1', 
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name='hidden2', 
                              activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name='outputs')
    y_proba = tf.nn.softmax(logits)
    
with tf.name_scope('loss'):
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(cross_entropy, name='loss')
    
with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope('eval'):
    # Checks whether highest probability class in logits is correct prediction (top 1)
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Execution Phase

In [8]:
n_epochs = 20
batch_size = 50

In [9]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [10]:
with tf.Session() as sess:
    init.run()
    for epoch in np.arange(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        batch_accuracy = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        validation_accuracy = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(f'{epoch} Batch accuracy: {batch_accuracy} Validation accuracy: {validation_accuracy}')
        
    saver.save(sess, 'models/final_mnist_model.ckpt')

0 Batch accuracy: 0.8799999952316284 Validation accuracy: 0.9002000093460083
1 Batch accuracy: 0.8199999928474426 Validation accuracy: 0.9175999760627747
2 Batch accuracy: 0.9800000190734863 Validation accuracy: 0.9336000084877014
3 Batch accuracy: 0.9599999785423279 Validation accuracy: 0.9413999915122986
4 Batch accuracy: 0.9800000190734863 Validation accuracy: 0.9459999799728394
5 Batch accuracy: 0.9200000166893005 Validation accuracy: 0.9502000212669373
6 Batch accuracy: 0.9200000166893005 Validation accuracy: 0.9538000226020813
7 Batch accuracy: 0.9599999785423279 Validation accuracy: 0.9571999907493591
8 Batch accuracy: 0.9800000190734863 Validation accuracy: 0.9593999981880188
9 Batch accuracy: 0.9800000190734863 Validation accuracy: 0.9628000259399414
10 Batch accuracy: 1.0 Validation accuracy: 0.9639999866485596
11 Batch accuracy: 1.0 Validation accuracy: 0.9652000069618225
12 Batch accuracy: 0.9599999785423279 Validation accuracy: 0.9664000272750854
13 Batch accuracy: 1.0 Val

In [11]:
# Restore model and use to make predictions
with tf.Session() as sess:
    saver.restore(sess, 'models/final_mnist_model.ckpt')
    X_new = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new})
    y_pred = np.argmax(Z, axis=1)
    
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

Predicted classes: [7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
