# Jonathan Halverson
# Friday, November 17, 2017
# Transfer learning (problem 8)

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('halverson')

## Part 1: Create a model

In [2]:
tf.reset_default_graph()

In [3]:
n_inputs = 28 * 28
n_hidden = 100
n_outputs = 5

In [4]:
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(dtype=tf.int64, shape=(None), name="y")

In [5]:
he_init = [tf.contrib.layers.variance_scaling_initializer(seed=s) for s in [1234, 3456, 5678, 7890, 9012]]

In [6]:
with tf.name_scope('hidden_layers') as scope:
     hidden1 = tf.layers.dense(X      , n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[0], name="hidden1")
     hidden2 = tf.layers.dense(hidden1, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[1], name="hidden2")
     hidden3 = tf.layers.dense(hidden2, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[2], name="hidden3")
     hidden4 = tf.layers.dense(hidden3, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[3], name="hidden4")
     hidden5 = tf.layers.dense(hidden4, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[4], name="hidden5")
     logits  = tf.layers.dense(hidden5, n_outputs, activation=None, name="logits")

In [7]:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

In [8]:
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

In [9]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#accuracy_, accuracy_op = tf.metrics.accuracy(y, tf.arg_max(logits, 1))

In [10]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [11]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [12]:
#accuracy_train_summary = tf.summary.scalar('train_accuracy', accuracy_op)
#accuracy_test_summary = tf.summary.scalar('test_accuracy', accuracy_op)
accuracy_train_summary = tf.summary.scalar('train_accuracy', accuracy)
accuracy_test_summary = tf.summary.scalar('test_accuracy', accuracy)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [13]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [14]:
def filter_digits(A, b, i):
     msk = (b <= i)
     return A[msk].copy(), b[msk].copy()

In [15]:
images_train_04, labels_train_04 = filter_digits(mnist.train.images, mnist.train.labels, 4)
images_valid_04, labels_valid_04 = filter_digits(mnist.validation.images, mnist.validation.labels, 4)
images_test_04, labels_test_04 = filter_digits(mnist.test.images, mnist.test.labels, 4)

In [16]:
mnist.train.images.min(), mnist.train.images.max()

(0.0, 1.0)

In [17]:
def fetch_batch(A, b, batch_size):
     # could use randint and bootstrapping
     indices = np.random.choice(range(A.shape[0]), size=batch_size, replace=False)
     return A[indices], b[indices]

In [18]:
batch_size = 200
n_epochs = 10

In [20]:
with tf.Session() as sess:
     init.run()
     #sess.run(tf.local_variables_initializer()) # needed for metrics
     for epoch in range(n_epochs + 1):
          for iteration in range(images_train_04.shape[0] // batch_size):
               X_batch, y_batch = fetch_batch(images_train_04, labels_train_04, batch_size)
               sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
          # tensorboard
          accuracy_train_tb = accuracy_train_summary.eval(feed_dict={X:X_batch, y:y_batch})
          accuracy_test_tb = accuracy_test_summary.eval(feed_dict={X:images_test_04, y:labels_test_04})
          file_writer.add_summary(accuracy_train_tb, epoch)
          file_writer.add_summary(accuracy_test_tb, epoch)
          save_path = saver.save(sess, '/tmp/adam_five_layers.ckpt')
          #accuracy_train = accuracy_op.eval(feed_dict={X:X_batch, y:y_batch})
          #accuracy_test = accuracy_op.eval(feed_dict={X:images_valid_04, y:labels_valid_04})
          accuracy_train = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
          accuracy_test = accuracy.eval(feed_dict={X:images_valid_04, y:labels_valid_04})
          if (epoch % 10 == 0): print(epoch, "Train accuracy=", accuracy_train, "Test accuracy=", accuracy_test)
file_writer.close()

(0, 'Train accuracy=', 0.98000002, 'Test accuracy=', 0.98279905)
(10, 'Train accuracy=', 1.0, 'Test accuracy=', 0.99061769)


In [None]:
[v.name for v in tf.global_variables()]

In [None]:
[v.name for v in tf.trainable_variables()]

Let's take a look at the weights of the first hidden layer:

In [None]:
wts = [v for v in tf.trainable_variables() if v.name == "hidden1/kernel:0"][0]

In [None]:
graph = tf.get_default_graph()
assign_kernel = graph.get_operation_by_name('hidden1/kernel/Assign')
init_kernel = assign_kernel.inputs[1]

In [None]:
init = tf.global_variables_initializer()

In [None]:
with tf.Session() as sess:
     init.run()
     print wts.eval()
     print init_kernel.eval()

In [None]:
print init_kernel

## Part 2: Use the hidden layers from the first model in a new model

In [None]:
with tf.Session() as sess:
     saver.restore(sess, "/tmp/adam_five_layers.ckpt")
     #saver = tf.train.import_meta_graph(sess, "/tmp/adam_five_layers.meta")
     X_new = images_train_04[0:3]
     Z = logits.eval(feed_dict={X:X_new})
     y_pred = np.argmax(Z, axis=1)

In [None]:
y_pred, labels_train_04[0:3]

The above shows that the model was restored and used to make correct predictions.

In [None]:
for op in tf.get_default_graph().get_operations():
     if ('kernel' in op.name and 'Adam' not in op.name): print (op.name)

In [None]:
reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars])
restore_saver = tf.train.Saver(reuse_vars_dict)

In [None]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [None]:
with tf.Session() as sess:
     init.run()
     restore_saver.restore(sess, '/tmp/my_model_chckpt.ckpt')
     # train the model
     save_path = saver.save(sess, '/tmp/my_new_final_model.ckpt')