## Transfer Learning

### What you will see
On this tutorial you will learn how to load a model (driving autoencoder) weights to be used on a different model (just drive). The idea is that the conv layers trained by the autoencoder will be already a good feature extractor for the driving model, so we loads those weights and train only the FC part of the model.

References:
* https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc
* https://blog.metaflow.fr/tensorflow-saving-restoring-and-mixing-multiple-models-c4c94d5d7125
* https://stackoverflow.com/questions/36533723/tensorflow-get-all-variables-in-scope
* https://stackoverflow.com/questions/33727935/how-to-use-stop-gradient-in-tensorflow
* https://github.com/tensorflow/tensorflow/issues/6264
* https://www.reddit.com/r/tensorflow/comments/6787gl/how_to_freeze_only_some_weights/
* https://github.com/oduerr/dl_tutorial/blob/master/tensorflow/debugging/embedding.ipynb

In [1]:
import tensorflow as tf
import scipy.misc
import sys
sys.path.append('../')
import model
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as plt_anim
from PIL import Image
import random
import os
import subprocess
import glob
from driving_data import HandleData

# Regularization value
L2NormConst = 0.001
start_lr = 0.0001
batch_size=200
epochs = 600
input_train_hdf5 = '../DatasetLMDB'
input_val_hdf5 = ''
logs_path = '../logs'
save_dir = '../save'
iter_disp = 10

os.environ["CUDA_VISIBLE_DEVICES"] = str(0)

### Open Model

In [2]:
# Open Model
driving_model = model.DrivingModel()
    
# Get placeholders from model
model_in = driving_model.input
model_out = driving_model.output
labels_in = driving_model.label_in
model_drop = driving_model.dropout_control

### Select the conv layers and the fc layers

In [3]:
# Select only the convs
list_convs = [v for v in tf.global_variables() if "conv" in v.name]
list_fc_linear = [v for v in tf.global_variables() if "fc" in v.name or "output" in v.name]
driving_model_vars = tf.global_variables()
driving_model_vars

[<tf.Variable 'conv1/weights:0' shape=(5, 5, 3, 24) dtype=float32_ref>,
 <tf.Variable 'conv1/bias:0' shape=(24,) dtype=float32_ref>,
 <tf.Variable 'conv2/weights:0' shape=(5, 5, 24, 36) dtype=float32_ref>,
 <tf.Variable 'conv2/bias:0' shape=(36,) dtype=float32_ref>,
 <tf.Variable 'conv3/weights:0' shape=(5, 5, 36, 48) dtype=float32_ref>,
 <tf.Variable 'conv3/bias:0' shape=(48,) dtype=float32_ref>,
 <tf.Variable 'conv4/weights:0' shape=(3, 3, 48, 64) dtype=float32_ref>,
 <tf.Variable 'conv4/bias:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'conv5/weights:0' shape=(3, 3, 64, 64) dtype=float32_ref>,
 <tf.Variable 'conv5/bias:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'fc1/weights:0' shape=(1152, 1164) dtype=float32_ref>,
 <tf.Variable 'fc1/bias:0' shape=(1164,) dtype=float32_ref>,
 <tf.Variable 'fc2/weights:0' shape=(1164, 100) dtype=float32_ref>,
 <tf.Variable 'fc2/bias:0' shape=(100,) dtype=float32_ref>,
 <tf.Variable 'fc3/weights:0' shape=(100, 50) dtype=float32_ref>,
 <tf.Va

#### Add some more layers to train

In [4]:
# Append the conv5 to the list of layers that we want to train
conv5_list = [v for v in tf.global_variables() if "conv5" in v.name]
list_fc_linear.append(conv5_list[0])
list_fc_linear.append(conv5_list[1])

### Define saver objects to load the autoencoder checkpoint and the whole graph

In [5]:
# Define the saver object to load only the conv variables
saver_load_autoencoder = tf.train.Saver(var_list=list_convs)
# Define saver object to save all the variables of the drivingModel graph
saver = tf.train.Saver(var_list=driving_model_vars)

### Create the Session
Basically ask tensorflow to build the graph

In [6]:
# Avoid allocating the whole memory
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

### Define the losses

In [7]:
# Create histogram for labels
tf.summary.histogram("steer_angle", labels_in)
# Add input image/steering angle on summary
tf.summary.image("input_image", model_in, 10)

train_vars = list_fc_linear
with tf.name_scope("MSE_Loss_L2Reg"):
    loss = tf.reduce_mean(tf.square(tf.subtract(labels_in, model_out))) + tf.add_n(
        [tf.nn.l2_loss(v) for v in train_vars]) * L2NormConst

# Add model accuracy
with tf.name_scope("Loss_Validation"):
    loss_val = tf.reduce_mean(tf.square(tf.subtract(labels_in, model_out)))

### Define the solver
The thing here is that we pass only the variables that we want to optimize

In [8]:
# Solver configuration
# Get ops to update moving_mean and moving_variance from batch_norm
# Reference: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.name_scope("Solver"):
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = start_lr
    # decay every 10000 steps with a base of 0.96
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                               1000, 0.9, staircase=True)

    # Basically update the batch_norm moving averages before the training step
    # http://ruishu.io/2016/12/27/batchnorm/
    with tf.control_dependencies(update_ops):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step, var_list=list_fc_linear)

### Initialize the values then actually load the values from the autoencoder checkpoint

In [9]:
# Initialize all random variables (Weights/Bias)
sess.run(tf.global_variables_initializer())

# Restore only the weights (From AutoEncoder)
saver_load_autoencoder.restore(sess, "../save_autoencoder/model-222")

INFO:tensorflow:Restoring parameters from ../save_autoencoder/model-222


### Add some variables to be observed on Tensorboard

In [10]:
# Monitor loss, learning_rate, global_step, etc...
tf.summary.scalar("loss_train", loss)
tf.summary.scalar("learning_rate", learning_rate)
tf.summary.scalar("global_step", global_step)
# merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

# Configure where to save the logs for tensorboard
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

### Load the dataset

In [11]:
data = HandleData(path=input_train_hdf5, path_val=input_val_hdf5)
num_images_epoch = int(data.get_num_images() / batch_size)
print('Num samples',data.get_num_images(), 'Iterations per epoch:', num_images_epoch, 'batch size:', batch_size)

Loading training data
LMDB file
Spliting training and validation
Number training images: 13504
Number validation images: 3376
Num samples 16880 Iterations per epoch: 84 batch size: 200


### Do training

In [12]:
# For each epoch
for epoch in range(epochs):
    for i in range(int(data.get_num_images() / batch_size)):
        # Get training batch
        xs_train, ys_train = data.LoadTrainBatch(batch_size, should_augment=True)

        # Send training batch to tensorflow graph (Dropout enabled)
        train_step.run(feed_dict={model_in: xs_train, labels_in: ys_train, model_drop: 0.8})

        # Display some information each x iterations
        if i % iter_disp == 0:
            # Get validation batch
            xs, ys = data.LoadValBatch(batch_size)
            # Send validation batch to tensorflow graph (Dropout disabled)
            loss_value = loss_val.eval(feed_dict={model_in: xs, labels_in: ys, model_drop: 1.0})
            print("Epoch: %d, Step: %d, Loss(Val): %g" % (epoch, epoch * batch_size + i, loss_value))

        # write logs at every iteration
        summary = merged_summary_op.eval(feed_dict={model_in: xs_train, labels_in: ys_train, model_drop: 1.0})
        summary_writer.add_summary(summary, epoch * batch_size + i)

    # Save checkpoint after each epoch
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    checkpoint_path = os.path.join(save_dir, "model")
    filename = saver.save(sess, checkpoint_path, global_step=epoch)
    print("Model saved in file: %s" % filename)

    # Shuffle data at each epoch end
    print("Shuffle data")
    data.shuffleData()

Epoch: 0, Step: 0, Loss(Val): 0.185667
Epoch: 0, Step: 10, Loss(Val): 0.082971
Epoch: 0, Step: 20, Loss(Val): 0.1159
Epoch: 0, Step: 30, Loss(Val): 0.0796375
Epoch: 0, Step: 40, Loss(Val): 0.0836503
Epoch: 0, Step: 50, Loss(Val): 0.097047
Epoch: 0, Step: 60, Loss(Val): 0.100018
Epoch: 0, Step: 70, Loss(Val): 0.0820658
Epoch: 0, Step: 80, Loss(Val): 0.0851202
Model saved in file: ../save/model-0
Shuffle data
Epoch: 1, Step: 200, Loss(Val): 0.0913661
Epoch: 1, Step: 210, Loss(Val): 0.0810633
Epoch: 1, Step: 220, Loss(Val): 0.0770074
Epoch: 1, Step: 230, Loss(Val): 0.0807202
Epoch: 1, Step: 240, Loss(Val): 0.0734986
Epoch: 1, Step: 250, Loss(Val): 0.0781651
Epoch: 1, Step: 260, Loss(Val): 0.109681
Epoch: 1, Step: 270, Loss(Val): 0.0853202
Epoch: 1, Step: 280, Loss(Val): 0.094565
Model saved in file: ../save/model-1
Shuffle data
Epoch: 2, Step: 400, Loss(Val): 0.0873476
Epoch: 2, Step: 410, Loss(Val): 0.101198
Epoch: 2, Step: 420, Loss(Val): 0.079629
Epoch: 2, Step: 430, Loss(Val): 0.09725