# Jonathan Halverson
# Friday, November 17, 2017
# Transfer learning (problem 8)

Here we create a model to recognize hand written digits between 0 and 4. In the next stage we will restore the model and use it to train a second model on digits 5 through 9.

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('halverson')

## Part 1: Create a model

In [2]:
tf.reset_default_graph()

In [3]:
n_inputs = 28 * 28
n_hidden = 100
n_outputs = 5

In [4]:
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(dtype=tf.int64, shape=(None), name="y")

In [5]:
he_init = [tf.contrib.layers.variance_scaling_initializer(seed=s) for s in [1234, 3456, 5678, 7890, 9012]]

In [6]:
with tf.name_scope('hidden_layers') as scope:
     hidden1 = tf.layers.dense(X      , n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[0], name="hidden1")
     hidden2 = tf.layers.dense(hidden1, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[1], name="hidden2")
     hidden3 = tf.layers.dense(hidden2, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[2], name="hidden3")
     hidden4 = tf.layers.dense(hidden3, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[3], name="hidden4")
     hidden5 = tf.layers.dense(hidden4, n_hidden, activation=tf.nn.elu, kernel_initializer=he_init[4], name="hidden5")
     logits  = tf.layers.dense(hidden5, n_outputs, activation=None, name="logits")

In [7]:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

In [8]:
with tf.name_scope('digits_optimizer') as scope:
     learning_rate = 0.001
     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
     training_op = optimizer.minimize(loss)

In [9]:
with tf.name_scope("eval"):
     correct = tf.nn.in_top_k(logits, y, 1)
     accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
     #accuracy_, accuracy_op = tf.metrics.accuracy(y, tf.arg_max(logits, 1))

In [10]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [11]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [12]:
#accuracy_train_summary = tf.summary.scalar('train_accuracy', accuracy_op)
#accuracy_test_summary = tf.summary.scalar('test_accuracy', accuracy_op)
accuracy_train_summary = tf.summary.scalar('train_accuracy', accuracy)
accuracy_test_summary = tf.summary.scalar('test_accuracy', accuracy)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [13]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [14]:
def filter_digits(A, b, i):
     msk = (b <= i)
     return A[msk].copy(), b[msk].copy()

In [15]:
images_train_04, labels_train_04 = filter_digits(mnist.train.images, mnist.train.labels, 4)
images_valid_04, labels_valid_04 = filter_digits(mnist.validation.images, mnist.validation.labels, 4)
images_test_04, labels_test_04 = filter_digits(mnist.test.images, mnist.test.labels, 4)

Make sure the features are scaled:

In [16]:
mnist.train.images.min(), mnist.train.images.max()

(0.0, 1.0)

In [17]:
def fetch_batch(A, b, batch_size):
     # could use randint and bootstrapping
     indices = np.random.choice(range(A.shape[0]), size=batch_size, replace=False)
     return A[indices], b[indices]

In [18]:
batch_size = 200
n_epochs = 50

In [19]:
with tf.Session() as sess:
     init.run()
     #sess.run(tf.local_variables_initializer()) # needed for metrics
     for epoch in range(n_epochs + 1):
          for iteration in range(images_train_04.shape[0] // batch_size):
               X_batch, y_batch = fetch_batch(images_train_04, labels_train_04, batch_size)
               sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
          # tensorboard
          accuracy_train_tb = accuracy_train_summary.eval(feed_dict={X:X_batch, y:y_batch})
          accuracy_test_tb = accuracy_test_summary.eval(feed_dict={X:images_test_04, y:labels_test_04})
          file_writer.add_summary(accuracy_train_tb, epoch)
          file_writer.add_summary(accuracy_test_tb, epoch)
          #accuracy_train = accuracy_op.eval(feed_dict={X:X_batch, y:y_batch})
          #accuracy_test = accuracy_op.eval(feed_dict={X:images_valid_04, y:labels_valid_04})
          accuracy_train = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
          accuracy_test = accuracy.eval(feed_dict={X:images_valid_04, y:labels_valid_04})
          if (epoch % 10 == 0): print(epoch, "Train accuracy=", accuracy_train, "Test accuracy=", accuracy_test)
          save_path = saver.save(sess, '/tmp/adam_five_layers.ckpt')
file_writer.close()

(0, 'Train accuracy=', 0.97000003, 'Test accuracy=', 0.9796716)
(10, 'Train accuracy=', 1.0, 'Test accuracy=', 0.98827207)
(20, 'Train accuracy=', 1.0, 'Test accuracy=', 0.99139953)
(30, 'Train accuracy=', 1.0, 'Test accuracy=', 0.99218142)
(40, 'Train accuracy=', 1.0, 'Test accuracy=', 0.99452698)
(50, 'Train accuracy=', 1.0, 'Test accuracy=', 0.99491793)


In [20]:
[v.name for v in tf.global_variables()]

[u'hidden1/kernel:0',
 u'hidden1/bias:0',
 u'hidden2/kernel:0',
 u'hidden2/bias:0',
 u'hidden3/kernel:0',
 u'hidden3/bias:0',
 u'hidden4/kernel:0',
 u'hidden4/bias:0',
 u'hidden5/kernel:0',
 u'hidden5/bias:0',
 u'logits/kernel:0',
 u'logits/bias:0',
 u'digits_optimizer/beta1_power:0',
 u'digits_optimizer/beta2_power:0',
 u'hidden1/kernel/Adam:0',
 u'hidden1/kernel/Adam_1:0',
 u'hidden1/bias/Adam:0',
 u'hidden1/bias/Adam_1:0',
 u'hidden2/kernel/Adam:0',
 u'hidden2/kernel/Adam_1:0',
 u'hidden2/bias/Adam:0',
 u'hidden2/bias/Adam_1:0',
 u'hidden3/kernel/Adam:0',
 u'hidden3/kernel/Adam_1:0',
 u'hidden3/bias/Adam:0',
 u'hidden3/bias/Adam_1:0',
 u'hidden4/kernel/Adam:0',
 u'hidden4/kernel/Adam_1:0',
 u'hidden4/bias/Adam:0',
 u'hidden4/bias/Adam_1:0',
 u'hidden5/kernel/Adam:0',
 u'hidden5/kernel/Adam_1:0',
 u'hidden5/bias/Adam:0',
 u'hidden5/bias/Adam_1:0',
 u'logits/kernel/Adam:0',
 u'logits/kernel/Adam_1:0',
 u'logits/bias/Adam:0',
 u'logits/bias/Adam_1:0']

In [21]:
[v.name for v in tf.trainable_variables()]

[u'hidden1/kernel:0',
 u'hidden1/bias:0',
 u'hidden2/kernel:0',
 u'hidden2/bias:0',
 u'hidden3/kernel:0',
 u'hidden3/bias:0',
 u'hidden4/kernel:0',
 u'hidden4/bias:0',
 u'hidden5/kernel:0',
 u'hidden5/bias:0',
 u'logits/kernel:0',
 u'logits/bias:0']

Let's take a look at the weights of the first hidden layer:

In [22]:
wts = [v for v in tf.trainable_variables() if v.name == "hidden1/kernel:0"][0]

In [23]:
graph = tf.get_default_graph()
assign_kernel = graph.get_operation_by_name('hidden1/kernel/Assign')
init_kernel = assign_kernel.inputs[1]

In [24]:
init = tf.global_variables_initializer()

In [25]:
with tf.Session() as sess:
     init.run()
     print wts.eval()
     print init_kernel.eval()

[[ 0.02956573 -0.0147317   0.03754659 ...,  0.0100181  -0.04642533
  -0.00392781]
 [ 0.00681664  0.01608448 -0.06526339 ...,  0.05845329  0.06846888
  -0.01446561]
 [ 0.01235745 -0.00840173  0.04258089 ..., -0.00761735  0.01835208
   0.10508064]
 ..., 
 [-0.10877235 -0.00780591 -0.06775687 ..., -0.05455706  0.01110328
  -0.07342914]
 [ 0.00898654  0.02877282 -0.08861893 ...,  0.00292607  0.0988029
  -0.10259175]
 [ 0.06239619  0.00710347 -0.02129543 ..., -0.01523922 -0.02762903
  -0.04033452]]
[[ 0.00197871 -0.09727467  0.03001994 ..., -0.03357524  0.02215341
   0.01172276]
 [ 0.0351553   0.03946508  0.09650076 ...,  0.05853136  0.05950334
   0.00707132]
 [-0.08648536 -0.02498405 -0.02373177 ..., -0.00258497  0.06656438
  -0.03916783]
 ..., 
 [-0.00804007  0.05561004 -0.03737677 ..., -0.05511406 -0.08602148
  -0.10004824]
 [-0.00557009 -0.01244225 -0.02345512 ..., -0.01570851  0.02791833
   0.09433575]
 [-0.01495138  0.01993305 -0.01939865 ..., -0.00214882 -0.0788036
  -0.04288276]]


In [26]:
print init_kernel

Tensor("hidden1/kernel/Initializer/truncated_normal:0", shape=(784, 100), dtype=float32)
