## Transfer Learning

### What you will see
On this tutorial you will learn how to load a model (driving autoencoder) weights to be used on a different model (just drive). The idea is that the conv layers trained by the autoencoder will be already a good feature extractor for the driving model, so we loads those weights and train only the FC part of the model.

References:
* https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc
* https://blog.metaflow.fr/tensorflow-saving-restoring-and-mixing-multiple-models-c4c94d5d7125
* https://stackoverflow.com/questions/36533723/tensorflow-get-all-variables-in-scope
* https://stackoverflow.com/questions/33727935/how-to-use-stop-gradient-in-tensorflow
* https://github.com/tensorflow/tensorflow/issues/6264
* https://www.reddit.com/r/tensorflow/comments/6787gl/how_to_freeze_only_some_weights/
* https://github.com/oduerr/dl_tutorial/blob/master/tensorflow/debugging/embedding.ipynb
* http://kawahara.ca/how-to-debug-a-jupyter-ipython-notebook/

In [1]:
import tensorflow as tf
import scipy.misc
import sys
sys.path.append('../')
import model
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as plt_anim
from PIL import Image
import random
import os
import subprocess
import glob
from driving_data import HandleData
from graphviz import Digraph


# Regularization value
L2NormConst = 0.001
start_lr = 0.001
batch_size=100
epochs = 600
input_train_hdf5 = '../DatasetLMDB'
input_val_hdf5 = ''
logs_path = '../logs'
save_dir = '../save'
iter_disp = 10

os.environ["CUDA_VISIBLE_DEVICES"] = str(0)

### Simple function to create dot diagram from tensorflow graph

In [21]:
def tf_to_dot(graph):
    dot = Digraph()

    # For each note on the graph
    for n in graph.node:    
        dot.node(n.name, label=n.name)        
        for i in n.input:            
            dot.edge(i, n.name)
            
    return dot

### Open Model

In [22]:
# Open Model
driving_model = model.DrivingModel()
    
# Get placeholders from model
model_in = driving_model.input
model_out = driving_model.output
labels_in = driving_model.label_in
model_drop = driving_model.dropout_control

### Select the conv layers and the fc layers

In [None]:
# Select only the convs
list_convs = [v for v in tf.global_variables() if "conv" in v.name]
list_fc_linear = [v for v in tf.global_variables() if "fc" in v.name or "output" in v.name]
driving_model_vars = tf.global_variables()
driving_model_vars

dotGraph = tf_to_dot(tf.get_default_graph().as_graph_def())
dotGraph.render()

#### Add some more layers to train

In [13]:
# Append the conv5 to the list of layers that we want to train
conv5_list = [v for v in tf.global_variables() 
              if "conv5" in v.name or "conv4" in v.name or "Norm_4" in v.name or "Norm_3" in v.name]
for layer in conv5_list:
    list_fc_linear.append(layer)
    #from IPython.core.debugger import Tracer; Tracer()()
conv5_list

[<tf.Variable 'conv4/weights:0' shape=(3, 3, 48, 64) dtype=float32_ref>,
 <tf.Variable 'conv4/bias:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_3/beta:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_3/gamma:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_3/moving_mean:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_3/moving_variance:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'conv5/weights:0' shape=(3, 3, 64, 64) dtype=float32_ref>,
 <tf.Variable 'conv5/bias:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_4/beta:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_4/gamma:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_4/moving_mean:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'BatchNorm_4/moving_variance:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'conv4_1/weights:0' shape=(3, 3, 48, 64) dtype=float32_ref>,
 <tf.Variable 'conv4_1/bias:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'conv5

### Define saver objects to load the autoencoder checkpoint and the whole graph

In [14]:
# Define the saver object to load only the conv variables
saver_load_autoencoder = tf.train.Saver(var_list=list_convs)
# Define saver object to save all the variables of the drivingModel graph
saver = tf.train.Saver(var_list=driving_model_vars)

### Create the Session
Basically ask tensorflow to build the graph

In [None]:
# Avoid allocating the whole memory
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

### Define the losses

In [None]:
# Create histogram for labels
tf.summary.histogram("steer_angle", labels_in)
# Add input image/steering angle on summary
tf.summary.image("input_image", model_in, 10)

train_vars = list_fc_linear
with tf.name_scope("MSE_Loss_L2Reg"):
    loss = tf.reduce_mean(tf.square(tf.subtract(labels_in, model_out))) + tf.add_n(
        [tf.nn.l2_loss(v) for v in train_vars]) * L2NormConst

# Add model accuracy
with tf.name_scope("Loss_Validation"):
    loss_val = tf.reduce_mean(tf.square(tf.subtract(labels_in, model_out)))

### Define the solver
The thing here is that we pass only the variables that we want to optimize

In [None]:
# Solver configuration
# Get ops to update moving_mean and moving_variance from batch_norm
# Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.name_scope("Solver"):
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = start_lr
    # decay every 10000 steps with a base of 0.96
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                               1000, 0.9, staircase=True)

    # Basically update the batch_norm moving averages before the training step
    # http://ruishu.io/2016/12/27/batchnorm/
    with tf.control_dependencies(update_ops):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step, var_list=list_fc_linear)

### Initialize the values then actually load the values from the autoencoder checkpoint

In [None]:
# Initialize all random variables (Weights/Bias)
sess.run(tf.global_variables_initializer())

# Restore only the weights (From AutoEncoder)
saver_load_autoencoder.restore(sess, "../save_autoencoder/model-590")

### Add some variables to be observed on Tensorboard

In [None]:
# Monitor loss, learning_rate, global_step, etc...
tf.summary.scalar("loss_train", loss)
tf.summary.scalar("learning_rate", learning_rate)
tf.summary.scalar("global_step", global_step)
# merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

# Configure where to save the logs for tensorboard
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

### Load the dataset

In [None]:
data = HandleData(path=input_train_hdf5, path_val=input_val_hdf5)
num_images_epoch = int(data.get_num_images() / batch_size)
print('Num samples',data.get_num_images(), 'Iterations per epoch:', num_images_epoch, 'batch size:', batch_size)

### Do training

In [None]:
# For each epoch
for epoch in range(epochs):
    for i in range(int(data.get_num_images() / batch_size)):
        # Get training batch
        xs_train, ys_train = data.LoadTrainBatch(batch_size, should_augment=True)

        # Send training batch to tensorflow graph (Dropout enabled)
        train_step.run(feed_dict={model_in: xs_train, labels_in: ys_train, model_drop: 0.8})

        # Display some information each x iterations
        if i % iter_disp == 0:
            # Get validation batch
            xs, ys = data.LoadValBatch(batch_size)
            # Send validation batch to tensorflow graph (Dropout disabled)
            loss_value = loss_val.eval(feed_dict={model_in: xs, labels_in: ys, model_drop: 1.0})
            print("Epoch: %d, Step: %d, Loss(Val): %g" % (epoch, epoch * batch_size + i, loss_value))

        # write logs at every iteration
        summary = merged_summary_op.eval(feed_dict={model_in: xs_train, labels_in: ys_train, model_drop: 1.0})
        summary_writer.add_summary(summary, epoch * batch_size + i)

    # Save checkpoint after each epoch
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    checkpoint_path = os.path.join(save_dir, "model")
    filename = saver.save(sess, checkpoint_path, global_step=epoch)
    print("Model saved in file: %s" % filename)

    # Shuffle data at each epoch end
    print("Shuffle data")
    data.shuffleData()