# Intro ANNs
## In TensorFlow

In [1]:
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
from tensorflow.examples.tutorials.mnist import input_data
from datetime import datetime

In [2]:
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


### Step 1. Building the Computation Graph

In [3]:
datetime.now().strftime("%Y%m%d")

'20180515'

In [49]:
now = datetime.now().strftime("%Y%m%d")
outfile = f"./tf_logs/run-{now}"

tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = fully_connected(X, n_hidden1, activation_fn=tf.nn.elu, 
                              scope="hidden1")
    hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=tf.nn.elu,
                              scope="hidden2")
    logits = fully_connected(hidden2, n_outputs, scope="outputs",
                             activation_fn=None)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_step = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    summary_acc = tf.summary.scalar("Accuracy", accuracy)
    file_writer = tf.summary.FileWriter(outfile, tf.get_default_graph())
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

## Step 2. Execution Phase

In [50]:
n_epochs = 70
batch_size = 50
batch_iterations = mnist.train.num_examples // batch_size

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for iteration in range(batch_iterations):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={X: X_batch, y:y_batch})
            
        acc_train = sess.run(accuracy, feed_dict={X:X_batch, y:y_batch})
        acc_test = sess.run(accuracy, feed_dict={X:mnist.train.images,
                                                 y:mnist.train.labels})
        end = "\n" if epoch % 10 == 0 else "\r"
        print(f"@Epoch {epoch}. Train Acc: {acc_train:0.3%} | Test Acc: {acc_test:0.3%}", end=end)
        tboard_loss = summary_acc.eval(feed_dict={X: X_batch, y:y_batch})
        file_writer.add_summary(tboard_loss, epoch)    
    save_path = saver.save(sess, "./tmp/my_model_final.ckpt")
file_writer.close()

@Epoch 0. Train Acc: 90.000% | Test Acc: 89.345%
@Epoch 10. Train Acc: 98.000% | Test Acc: 94.942%
@Epoch 20. Train Acc: 94.000% | Test Acc: 96.642%%
@Epoch 30. Train Acc: 100.000% | Test Acc: 97.555%
@Epoch 40. Train Acc: 100.000% | Test Acc: 98.204%
@Epoch 50. Train Acc: 100.000% | Test Acc: 98.556%
@Epoch 60. Train Acc: 100.000% | Test Acc: 98.887%
@Epoch 69. Train Acc: 100.000% | Test Acc: 99.167%

## Batch Normalization in TensorFlow

In [51]:
from tensorflow.contrib.layers import fully_connected, batch_norm

In [53]:
tf.reset_default_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

is_training = tf.placeholder(tf.bool, shape=(), name="is_training")
bn_params = {
    "is_training": is_training,
    "decay": 0.999, # The exponential decay hyperparameter
    "updates_collections": None
}

with tf.name_scope("dnn"):
    ###  Argument Scopes ### 
    # In order to aviod repetition,
    # the first parameter is a list of functions,
    # and the other parameters will be passed to
    # these functions automatically.
    with tf.contrib.framework.arg_scope(
        [fully_connected],
        normalizer_fn=batch_norm,
        normalizer_params=bn_params
    ):
        hidden1 = fully_connected(X, n_hidden1, activation_fn=tf.nn.elu,
                                  scope="hidden1")
        hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=tf.nn.elu,
                                  scope="hidden2")
        logits = fully_connected(hidden2, n_outputs, activation_fn=None,
                                 scope="logits")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=y,
        logits=logits
    )
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_step = optimizer.minimize(loss)

with tf.name_scope("evaluate"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [55]:
epochs = 70
batch_size = 50
batch_iterations = mnist.train.num_examples // batch_size

with tf.Session() as sess:
    for epoch in range(epochs):
        sess.run(init)
        for it in range(batch_iterations):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_step,
                     feed_dict={X:X_batch, y:y_batch, is_training:True})
        acc_mnist = sess.run(accuracy,
                             feed_dict={X:mnist.train.images,
                                        y:mnist.train.labels,
                                        is_training:False})
        end = "\n" if epoch % 10 == 0 else "\r"
        print(f"Accuracy @Epoch {epoch}: {acc_mnist:0.3%}", end=end)

Accuracy @Epoch 0: 87.695%
Accuracy @Epoch 10: 89.489%
Accuracy @Epoch 20: 87.689%
Accuracy @Epoch 30: 90.333%
Accuracy @Epoch 40: 88.845%
Accuracy @Epoch 50: 89.882%
Accuracy @Epoch 60: 89.269%
Accuracy @Epoch 69: 87.873%

## Gradient Clipping
In order to lessen the exploding gradients problem

In [15]:
tf.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
    hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
    logits = fully_connected(hidden2, n_outputs, activation_fn=None,
                             scope="logits")
    
#### GRADIENT CLIPPING IMPLEMENTATION ####
# In tensorflow, the optimizer’s minimize() function
# takes care of both computing the gradients and applying them,
# so you must instead call the optimizer’s compute_gradients()
# method first, then create an operation to clip the gradients
# using the clip_by_value() function, and finally create an operation
# to apply the clipped gradients using the optimizer’s apply_gradients() method
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01
with tf.name_scope("train"):
    threshold = 1.0
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    grads_and_vars = optimizer.compute_gradients(loss)
    capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
                  for grad, var in grads_and_vars]
    train_step = optimizer.apply_gradients(capped_gvs)

with tf.name_scope("evaluate"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()

In [21]:
n_epochs = 70
batch_size = 50
batch_iterations = mnist.train.num_examples // batch_size


with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for it in range(batch_iterations):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={X:X_batch, y:y_batch})
            
        test_acc = sess.run(accuracy,
                            feed_dict={X:mnist.train.images,
                                       y:mnist.train.labels})
        end = "\n" if epoch % 10 == 0 else "\r"
        print(f"@Epoch {epoch}. Test accuracy: {test_acc:0.4%}", end=end)

@Epoch 0. Test accuracy: 89.5000%
@Epoch 10. Test accuracy: 96.5891%
@Epoch 20. Test accuracy: 98.0891%
@Epoch 30. Test accuracy: 98.8727%
@Epoch 40. Test accuracy: 99.2764%
@Epoch 50. Test accuracy: 99.6564%
@Epoch 60. Test accuracy: 99.7618%
@Epoch 69. Test accuracy: 99.8909%