# Transfer learning

### a. Create a new FNN that reuses all the pretrained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a fresh new one.

First we load the best model's graph and get a handle on all the important operations we will need. Instead of creating a new softmax output layer, we will just reuse the existing one (since it has the same number of outputs as the existing one). We will reinitialize its parameters before training.

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [3]:
he_init = tf.variance_scaling_initializer()

In [4]:
reset_graph()

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

In [5]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

In [6]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

### b. Train this new DNN on digits 5 to 9, using only 100 images per digit, and time how long it takes. Despite this small number of examples, can you achieve high precision?

In [7]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [8]:
X_train2_full = X_train[y_train >= 5]
y_train2_full = y_train[y_train >= 5] - 5
X_valid2_full = X_valid[y_valid >= 5]
y_valid2_full = y_valid[y_valid >= 5] - 5
X_test2 = X_test[y_test >= 5]
y_test2 = y_test[y_test >= 5] - 5

We keep only 100 instances per class in the training set and 30 instances per class in the validation set.

In [9]:
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

In [10]:
X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [11]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    t0 = time.time()
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_best_mnist_model_0_to_4
0	Validation loss: 1.128565	Best loss: 1.128565	Accuracy: 52.67%
1	Validation loss: 1.029821	Best loss: 1.029821	Accuracy: 64.67%
2	Validation loss: 0.993459	Best loss: 0.993459	Accuracy: 64.00%
3	Validation loss: 0.888007	Best loss: 0.888007	Accuracy: 69.33%
4	Validation loss: 0.921085	Best loss: 0.888007	Accuracy: 66.00%
5	Validation loss: 0.919291	Best loss: 0.888007	Accuracy: 65.33%
6	Validation loss: 0.869334	Best loss: 0.869334	Accuracy: 66.67%
7	Validation loss: 0.875244	Best loss: 0.869334	Accuracy: 69.33%
8	Validation loss: 0.875298	Best loss: 0.869334	Accuracy: 65.33%
9	Validation loss: 0.852886	Best loss: 0.852886	Accuracy: 66.67%
10	Validation loss: 0.881996	Best loss: 0.852886	Accuracy: 65.33%
11	Validation loss: 0.889728	Best loss: 0.852886	Accuracy: 67.33%
12	Validation loss: 0.885761	Best loss: 0.852886	Accuracy: 66.67%
13	Validation loss: 0.896800	Best loss: 0.852886	Accuracy: 66.00%
14	Validation l

The accuracy is ok, given a tiny training set and with only one layer to tweak.

### c. Try caching the frozen layers, and train the model again: how much faster is it now?

In [12]:
hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")

In [13]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    t0 = time.time()
    
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train2, y: y_train2})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid2, y: y_valid2})
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_best_mnist_model_0_to_4
0	Validation loss: 1.113713	Best loss: 1.113713	Accuracy: 57.33%
1	Validation loss: 0.986192	Best loss: 0.986192	Accuracy: 68.00%
2	Validation loss: 0.927294	Best loss: 0.927294	Accuracy: 69.33%
3	Validation loss: 0.978427	Best loss: 0.927294	Accuracy: 64.00%
4	Validation loss: 0.883225	Best loss: 0.883225	Accuracy: 65.33%
5	Validation loss: 0.868069	Best loss: 0.868069	Accuracy: 66.00%
6	Validation loss: 0.926132	Best loss: 0.868069	Accuracy: 66.00%
7	Validation loss: 0.933927	Best loss: 0.868069	Accuracy: 63.33%
8	Validation loss: 0.972674	Best loss: 0.868069	Accuracy: 57.33%
9	Validation loss: 0.953491	Best loss: 0.868069	Accuracy: 60.67%
10	Validation loss: 0.916571	Best loss: 0.868069	Accuracy: 66.67%
11	Validation loss: 0.845653	Best loss: 0.845653	Accuracy: 68.67%
12	Validation loss: 0.911072	Best loss: 0.845653	Accuracy: 64.00%
13	Validation loss: 0.893564	Best loss: 0.845653	Accuracy: 70.67%
14	Validation l

### d. Try again reusing just four hidden layers instead of five. Can you achieve a higher precision?

In [14]:
reset_graph()

n_outputs = 5

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [15]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

In [16]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./my_mnist_model_5_to_9_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_best_mnist_model_0_to_4
0	Validation loss: 1.061010	Best loss: 1.061010	Accuracy: 60.67%
1	Validation loss: 0.977865	Best loss: 0.977865	Accuracy: 67.33%
2	Validation loss: 0.963718	Best loss: 0.963718	Accuracy: 64.67%
3	Validation loss: 0.866953	Best loss: 0.866953	Accuracy: 69.33%
4	Validation loss: 0.893164	Best loss: 0.866953	Accuracy: 66.67%
5	Validation loss: 0.861440	Best loss: 0.861440	Accuracy: 67.33%
6	Validation loss: 0.815578	Best loss: 0.815578	Accuracy: 68.67%
7	Validation loss: 0.822610	Best loss: 0.815578	Accuracy: 69.33%
8	Validation loss: 0.821225	Best loss: 0.815578	Accuracy: 69.33%
9	Validation loss: 0.797117	Best loss: 0.797117	Accuracy: 73.33%
10	Validation loss: 0.800130	Best loss: 0.797117	Accuracy: 70.00%
11	Validation loss: 0.811986	Best loss: 0.797117	Accuracy: 70.00%
12	Validation loss: 0.789520	Best loss: 0.789520	Accuracy: 72.67%
13	Validation loss: 0.818175	Best loss: 0.789520	Accuracy: 70.00%
14	Validation l

The result is slightly better.

### e. Now unfreeze the top two hidden layers and continue training. Can you get the model to perform even better?

In [17]:
learning_rate = 0.01

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [18]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./my_mnist_model_5_to_9_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_5_to_9_four_frozen
0	Validation loss: 0.862952	Best loss: 0.862952	Accuracy: 76.67%
1	Validation loss: 0.833993	Best loss: 0.833993	Accuracy: 74.67%
2	Validation loss: 0.733547	Best loss: 0.733547	Accuracy: 78.67%
3	Validation loss: 0.988450	Best loss: 0.733547	Accuracy: 72.00%
4	Validation loss: 0.797596	Best loss: 0.733547	Accuracy: 77.33%
5	Validation loss: 0.967240	Best loss: 0.733547	Accuracy: 73.33%
6	Validation loss: 1.020303	Best loss: 0.733547	Accuracy: 75.33%
7	Validation loss: 0.787361	Best loss: 0.733547	Accuracy: 81.33%
8	Validation loss: 1.167076	Best loss: 0.733547	Accuracy: 79.33%
9	Validation loss: 1.340540	Best loss: 0.733547	Accuracy: 79.33%
10	Validation loss: 1.255924	Best loss: 0.733547	Accuracy: 80.67%
11	Validation loss: 1.598556	Best loss: 0.733547	Accuracy: 77.33%
12	Validation loss: 1.169137	Best loss: 0.733547	Accuracy: 79.33%
13	Validation loss: 1.180845	Best loss: 0.733547	Accuracy: 78.00%
14	Valid

In [19]:
learning_rate = 0.01

optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam4")
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
no_frozen_saver = tf.train.Saver()

In [20]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = no_frozen_saver.save(sess, "./my_mnist_model_5_to_9_no_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    no_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_no_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_5_to_9_two_frozen
0	Validation loss: 0.523072	Best loss: 0.523072	Accuracy: 86.00%
1	Validation loss: 0.352548	Best loss: 0.352548	Accuracy: 89.33%
2	Validation loss: 0.320126	Best loss: 0.320126	Accuracy: 91.33%
3	Validation loss: 0.624296	Best loss: 0.320126	Accuracy: 86.67%
4	Validation loss: 0.331089	Best loss: 0.320126	Accuracy: 93.33%
5	Validation loss: 0.558394	Best loss: 0.320126	Accuracy: 90.00%
6	Validation loss: 0.530679	Best loss: 0.320126	Accuracy: 92.67%
7	Validation loss: 0.703869	Best loss: 0.320126	Accuracy: 92.00%
8	Validation loss: 0.687185	Best loss: 0.320126	Accuracy: 90.00%
9	Validation loss: 0.744075	Best loss: 0.320126	Accuracy: 90.67%
10	Validation loss: 0.496201	Best loss: 0.320126	Accuracy: 91.33%
11	Validation loss: 0.477802	Best loss: 0.320126	Accuracy: 91.33%
12	Validation loss: 0.632141	Best loss: 0.320126	Accuracy: 89.33%
13	Validation loss: 0.494414	Best loss: 0.320126	Accuracy: 94.67%
14	Valida

In this task, transfer learning did not work well.