In [1]:
!cat ../bookGithubRepoURL.txt

https://github.com/ageron/handson-ml


In [1]:
from functools import partial
from tensorflow.contrib.layers import variance_scaling_initializer 
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline
plt.rcParams['figure.figsize'] = [16, 9]

## Batch Normalization
### Implementation in TF

In [33]:
tf.reset_default_graph()

mnist = input_data.read_data_sets("/tmp/data/")
n_in      = 28 * 28 # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_out     = 10
eta       = 0.01
momentum  = 0.25 

X = tf.placeholder(tf.float32, shape = (None, n_in), name = 'X')
y = tf.placeholder(tf.int64, shape = (None), name = 'y')
is_training = tf.placeholder(tf.bool, shape = (), name = 'is_training')

with tf.name_scope('dnn'):
    he_init = variance_scaling_initializer()
    my_batch_norm_layer = partial(tf.layers.batch_normalization,
                                  training = is_training,
                                  momentum = 0.9) # formerly 'decay'
    my_dense_layer = partial(tf.layers.dense, kernel_initializer = he_init)
    
    hidden1 = my_dense_layer(X, n_hidden1, name = 'hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name = 'hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(
        bn2, n_out, activation = None, name = 'outputs')
    logits = my_batch_norm_layer(logits_before_bn)
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [34]:
with tf.name_scope('loss'):
    x_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels = y, logits = logits)
    loss = tf.reduce_mean(x_entropy, name = 'loss')

In [35]:
with tf.name_scope('train'):
    optimizer = tf.train.MomentumOptimizer(eta, momentum)
    training_op = optimizer.minimize(loss)

In [36]:
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
# saver = tf.train.Saver()

In [37]:
n_epochs = 40
batch_size = 200

with tf.Session() as s:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(len(mnist.test.labels) // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            s.run(
                [training_op, extra_update_ops], 
                feed_dict = { is_training: True, X: X_batch, y: y_batch })
        acc_train = accuracy.eval(
            feed_dict = { is_training: False, X: X_batch, y: y_batch })
        acc_test = accuracy.eval(feed_dict = { is_training: False, 
                                               X: mnist.test.images, 
                                               y: mnist.test.labels })
        print('%2d Train accuracy: %.4f   Test accuracy: %.4f'
              %(epoch, acc_train, acc_test))
        # save_path = saver.save(s, 'my_batch_norm_mod.ckpt')

 0 Train accuracy: 0.7150   Test accuracy: 0.7611
 1 Train accuracy: 0.8350   Test accuracy: 0.8324
 2 Train accuracy: 0.8600   Test accuracy: 0.8568
 3 Train accuracy: 0.8800   Test accuracy: 0.8722
 4 Train accuracy: 0.8850   Test accuracy: 0.8791
 5 Train accuracy: 0.8650   Test accuracy: 0.8857
 6 Train accuracy: 0.8500   Test accuracy: 0.8935
 7 Train accuracy: 0.9050   Test accuracy: 0.8978
 8 Train accuracy: 0.8950   Test accuracy: 0.9019
 9 Train accuracy: 0.9300   Test accuracy: 0.9060
10 Train accuracy: 0.9050   Test accuracy: 0.9090
11 Train accuracy: 0.9200   Test accuracy: 0.9107
12 Train accuracy: 0.9000   Test accuracy: 0.9146
13 Train accuracy: 0.8800   Test accuracy: 0.9141
14 Train accuracy: 0.9450   Test accuracy: 0.9189
15 Train accuracy: 0.9000   Test accuracy: 0.9202
16 Train accuracy: 0.9250   Test accuracy: 0.9228
17 Train accuracy: 0.9050   Test accuracy: 0.9237
18 Train accuracy: 0.9550   Test accuracy: 0.9251
19 Train accuracy: 0.9200   Test accuracy: 0.9265


### Gradient Clipping
To prevent exploding/vanishing gradients, simply clip the gradient to some threshold value (mostly useful for recurrent nns). 
NOTE: Batch normalization is generally preferred

In [38]:
threshold = 1.
optimizer = tf.train.GradientDescentOptimizer(eta)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
              for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

# run this training_op at every training step

## Reusing Pretrained Layers
### Reusing a TF Model

```
with tf.Session() as s:
    saver.restore(s, './pretrained_model.ckpt')
    # ...```

To reuse only SOME layers, Saver must be configured accordingly

```
# <Build new model with same def as before for hidden layers 1 - 3>
init = tf.global_variables_initializer()

reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               scope = 'hidden[123]')
reuse_vars_dict = dict(
    [(var.name, var.name) for var in reuse_vars])
# saver to restore original mod
original_saver = tf.Saver(reuse_vars) 
# ...and to save new mod
new_saver = tf.Saver()                

with tf.Session() as s:
    s.run(init)
    # restore layers 1-3
    original_saver.restore('./pretrained_model.ckpt') 
    # <Train new mod>
    new_saver.save('./new_model.ckpt') # save whole model```

## Faster Optimizers

### Momentum Optimization

In [39]:
optimizer = tf.train.MomentumOptimizer(learning_rate = eta, momentum = 0.9)

### Nesterov Accelerated Gradient

In [40]:
optimizer = tf.train.MomentumOptimizer(
    learning_rate = eta, momentum = 0.9, use_nesterov = True)

### AdaGrad
### RMSProp

In [41]:
optimizer = tf.train.RMSPropOptimizer(
    learning_rate = eta, momentum = 0.9, decay = 0.9, epsilon = 1e-10)

### Adam Optimization
This is a combination of many of the above, and generally yields the best results

In [42]:
optimizer = tf.train.AdamOptimizer(learning_rate = eta)