## Implementing Batch Normalization with TensorFlow

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import batch_norm
from tensorflow.contrib.layers import fully_connected

In [None]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [None]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name='y')
is_training = tf.placeholder(tf.bool, shape=(), name="is_training")
bn_params = {
    'is_training': is_training,
    'decay': 0.99, # 衰减参数，用于更新平均值参数
    'updates_collections': None, #告知tf自动计算输入平均值
    
    # 对于Activition Function为ReLU或None来说，这里可以不用设置，因为这些函数支持大范围参数
    # 其他Activition Function则需要设置Scale，因为这些核的变动范围主要为[-1，1]
    #'scale': True, 
}


In [None]:
hidden1 = fully_connected(X, n_hidden1, scope='hidden1',
                          normalizer_fn=batch_norm, 
                          normalizer_params=bn_params)
hidden2 = fully_connected(hidden1, n_hidden2, scope='hidden2',
                          normalizer_fn=batch_norm, 
                          normalizer_params=bn_params)
logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope='outputs',
                         normalizer_fn=batch_norm, normalizer_params=bn_params)

In [None]:
# 避免重复传值，还可以这么写
with tf.contrib.framework.arg_scope(
        [fully_connected], 
        normalizer_fn=batch_norm,
        normalizer_params=bn_params):
    hidden1 = fully_connected(X, n_hidden1, scope='hidden1_lite')
    hidden2 = fully_connected(hidden1, n_hidden2, scope='hidden2_lite')
    logits = fully_connected(hidden2, n_outputs, scope='outputs_lite', 
                             activation_fn=None)

In [None]:
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')
    
with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
    training_op = optimizer.minimize(loss)

In [None]:
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
# others
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data')

In [None]:
model_path = '/Users/chancezhang/machine_learning/hands_on_machine_learning/tmp/C11_Model_BN.ckpt'
n_epochs = 50
batch_size = 50
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for it in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={is_training:True, X:X_batch, y:y_batch})
        acc_train = accuracy.eval(feed_dict={is_training:False, X:X_batch, y:y_batch})
        print('epoch', epoch, 'train_acc:', acc_train)
    acc_test = accuracy.eval(feed_dict={is_training: False, 
                                        X: mnist.test.images,
                                        y: mnist.test.labels})
    print("Final Test Accuracy:", acc_test)
    saver.save(sess, model_path)


#### 结论：
之前的模型在mnist的准确度大概为0.979，使用BN之后准确度第一次进入0.98

In [None]:
# test predict
model_path = '/Users/chancezhang/machine_learning/hands_on_machine_learning/tmp/C11_Model_BN.ckpt'
test_idx = 1234
test_image = mnist.test.images[test_idx]
test_label = mnist.test.labels[test_idx]
with tf.Session() as sess:
    saver.restore(sess, model_path)
    pred = logits.eval(feed_dict={is_training:False, X:[test_image]})
    pred_idx = np.argmax(pred, axis=1)
    print("Predict:", pred_idx, " Answer:", test_label)
    

## Gradient Clipping

In [3]:

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
threshold = 1.0

with tf.name_scope('graidient_clipping'):
    # variables
    X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
    y = tf.placeholder(tf.int32, shape=(None), name='y')
    # dnn
    hidden1 = fully_connected(X, n_hidden1, scope='gc_hidden1')
    hidden2 = fully_connected(hidden1, n_hidden2, scope='gc_hidden2')
    logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope='gc_outputs')
    # loss
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='gc_loss')
    # train
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
    grads_and_vars = optimizer.compute_gradients(loss)
    capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
                 for grad, var in grads_and_vars]
    training_op = optimizer.apply_gradients(capped_gvs)
    # eval
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
gc_init = tf.global_variables_initializer()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data")

n_epochs = 20
batch_size = 50

with tf.Session() as sess:
    gc_init.run()
    for epoch in range(n_epochs):
        for it in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        print('epoch', epoch, ' acc:', acc)
    

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
epoch 0  acc: 0.92
epoch 1  acc: 0.94
epoch 2  acc: 0.94
epoch 3  acc: 0.98
epoch 4  acc: 0.96
epoch 5  acc: 0.94
epoch 6  acc: 0.98
epoch 7  acc: 0.98
epoch 8  acc: 0.96
epoch 9  acc: 0.98
epoch 10  acc: 1.0
epoch 11  acc: 0.96
epoch 12  acc: 0.98
epoch 13  acc: 0.98
epoch 14  acc: 0.98
epoch 15  acc: 1.0
epoch 16  acc: 0.98
epoch 17  acc: 0.98
epoch 18  acc: