Transfer learning is a deep learning technique where a model trained using a dataset (task) is used for training another data set (task). It has shown to allow faster learning with fewer data points. You can read about it at http://cs231n.github.io/transfer-learning/

In this homework, you will be implementing transfer learning. 

1. Train LeNet (taught in class) with MNIST dataset and random initialization and store the weights by using tf.train.Saver().

2. Then retrieve the saved weights and use them to train Fashion MNIST data set (https://github.com/zalandoresearch/fashion-mnist).


In [14]:
import tensorflow as tf
import math
from tensorflow.examples.tutorials.mnist import input_data
import time

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
print(mnist.train.num_examples) # Number of training data
print(mnist.test.num_examples) # Number of test data

55000
10000


In [4]:
# architecture hyper-parameter
learning_rate = 0.01
training_iters = 20000
batch_size = 128
display_step = 20

n_input = 784 # 28x28 image
n_classes = 10 # 1 for each digit [0-9]
dropout = 0.75 

tf.set_random_seed(3457)

In [5]:
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
print(x.shape, y.shape)

(?, 784) (?, 10)


In [6]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

In [7]:
def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [8]:
def conv_net(x, weights, biases, dropout):
    # reshape input to 28x28 size
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # Convolution layer 1
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max pooling
    conv1 = maxpool2d(conv1, k=2)

    # Convolution layer 2
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max pooling
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [9]:
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [10]:
# Create the model
model = conv_net(x, weights, biases, keep_prob)
print(model)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_model = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_model, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

Tensor("Add_1:0", shape=(?, 10), dtype=float32)


In [11]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = model, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

In [17]:
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = mnist.train.next_batch(batch_size)   
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
                                                              y: batch_y,
                                                              keep_prob: 1.})
            print("Iter " + str(step*batch_size) + ", Loss= " + \
                  "{:.3f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))
        step += 1
    
    # Calculate accuracy for 256 mnist test images
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
                                      y: mnist.test.labels[:256],
                                      keep_prob: 1.}))
    saver.save(sess, "mymodel/model.ckpt")

Iter 2560, Loss= 3571.960, Training Accuracy= 0.719
Iter 5120, Loss= 762.011, Training Accuracy= 0.766
Iter 7680, Loss= 488.152, Training Accuracy= 0.812
Iter 10240, Loss= 388.361, Training Accuracy= 0.773
Iter 12800, Loss= 484.760, Training Accuracy= 0.742
Iter 15360, Loss= 556.658, Training Accuracy= 0.719
Iter 17920, Loss= 216.785, Training Accuracy= 0.812
Testing Accuracy: 0.808594


In [18]:
tf.set_random_seed(123445)
digits_fashion = True
# Read fashion mnist
mnist = input_data.read_data_sets('fashion', one_hot=True)
with tf.Session() as sess:
    if digits_fashion:
        print("restoring from mymodel/model.ckpt")
        saver.restore(sess, "mymodel/model.ckpt")
    else:
        sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = mnist.train.next_batch(batch_size)   
        #print(batch_x.shape, batch_y.shape)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
                                                              y: batch_y,
                                                              keep_prob: 1.})
            print("Iter " + str(step*batch_size) + ", Loss= " + \
                  "{:.3f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))
        step += 1
    
    # Calculate accuracy for 256 mnist test images
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
                                      y: mnist.test.labels[:256],
                                      keep_prob: 1.}))

Extracting fashion\train-images-idx3-ubyte.gz
Extracting fashion\train-labels-idx1-ubyte.gz
Extracting fashion\t10k-images-idx3-ubyte.gz
Extracting fashion\t10k-labels-idx1-ubyte.gz
restoring from mymodel/model.ckpt
INFO:tensorflow:Restoring parameters from mymodel/model.ckpt
Iter 2560, Loss= 184.307, Training Accuracy= 0.797
Iter 5120, Loss= 206.185, Training Accuracy= 0.828
Iter 7680, Loss= 174.121, Training Accuracy= 0.789
Iter 10240, Loss= 269.649, Training Accuracy= 0.719
Iter 12800, Loss= 172.968, Training Accuracy= 0.797
Iter 15360, Loss= 112.278, Training Accuracy= 0.797
Iter 17920, Loss= 126.019, Training Accuracy= 0.812
Testing Accuracy: 0.800781
