In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data

In [2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
BETA_RECON = 0.0005
BETA_SOFT = 0.95
BETA_HARD = 0.8

num_features = 784
num_classes = 10
hidden_layer_sizes = [500, 300, num_classes]

num_iter = 10000
batch_size = 50

In [4]:
X = tf.placeholder(tf.float32, shape=[None, num_features], name="inputs")
Y = tf.placeholder(tf.float32, shape=[None, num_classes], name="targets")

In [5]:
def dense(X, input_size, output_size, activation_fn=tf.nn.relu, activation_name='activation', linear_name='linear'):
    weights = tf.get_variable("weights", [input_size, output_size], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable("biases", [output_size], initializer=tf.constant_initializer(0.0))
    if activation_fn:
        return activation_fn(tf.add(tf.matmul(X, weights), biases, name=linear_name), name=activation_name)
    return tf.add(tf.matmul(X, weights), biases, name=linear_name)

In [6]:
# with tf.variable_scope("fc1"):
#     fc1 = dense(X, num_features, 500, activation_fn=tf.nn.relu)
# with tf.variable_scope("fc2"):
#     fc2 = dense(fc1, 500, num_classes, activation_fn=None)
#     fc2_softmax = tf.nn.softmax(fc2, name='fc2_softmax')
    
prev_layer, prev_hidden_layer_size = X, num_features
for i, hidden_layer_size in enumerate(hidden_layer_sizes, 1):
    with tf.variable_scope("fc{layer_index}".format(layer_index=i)):
        if i != len(hidden_layer_sizes):
            h = dense(prev_layer, prev_hidden_layer_size, hidden_layer_size)
            prev_layer, prev_hidden_layer_size = h, hidden_layer_size
        else:
            logits = dense(prev_layer, prev_hidden_layer_size, num_classes, activation_fn=None, linear_name='true_logits')
            probs = tf.nn.softmax(logits, name='true_labels')
            
with tf.variable_scope("recon"):
    recon = dense(probs, num_classes, num_features, activation_fn=None, linear_name='reconstructed_x')
with tf.variable_scope("noisy_mapping"):
    noisy_probs = dense(probs, num_classes, num_classes, activation_fn=tf.nn.softmax, linear_name='noisy_probs')

In [7]:
# global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step")

with tf.name_scope("baseline"):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits), name="cross_entropy_loss")
    baseline_train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)    
    
with tf.name_scope("recon"):
    recon_loss = BETA_RECON * tf.nn.l2_loss(X - recon, name='reconstruction_loss')
    noisy_cross_entropy_loss = - tf.reduce_sum(tf.multiply(tf.log(noisy_probs + 1e-10), Y), name='noisy_cross_entropy_loss')
    l_recon = tf.add(noisy_cross_entropy_loss, recon_loss, name="l_recon")
    recon_train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(l_recon)
    
with tf.name_scope("soft_bootstrap"):
    l_soft = - tf.reduce_sum(tf.multiply(BETA_SOFT * Y + (1 - BETA_SOFT) * probs, tf.log(probs + 1e-10)), name='l_soft')
    soft_train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(l_soft)

with tf.name_scope("hard_bootstrap"):
    z = tf.one_hot(tf.argmax(probs, 1, name='fc2_softmax_argmax'), num_classes, name='z')
    l_hard = - tf.reduce_sum(tf.multiply(BETA_HARD * Y + (1 - BETA_HARD) * z, tf.log(probs + 1e-10)), name='l_hard')
    hard_train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(l_hard)

In [8]:
correct_prediction = tf.equal(tf.argmax(probs, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
with tf.name_scope("summaries"):
    tf.summary.scalar("cross_entropy_loss", cross_entropy)
    tf.summary.scalar("accuracy", accuracy)
    tf.summary.scalar("loss_recon", l_recon)
    tf.summary.scalar("reconstruction_loss", recon_loss)
    tf.summary.scalar("noisy_cross_entropy_loss", noisy_cross_entropy_loss)
    tf.summary.scalar("loss_soft", l_soft)
    tf.summary.scalar("loss_hard", l_hard)
    summary_op = tf.summary.merge_all()

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    print("Running Baseline")
    baseline_writer = tf.summary.FileWriter('./graph/baseline', sess.graph)
    for i in range(num_iter):
        X_batch, Y_batch = mnist.train.next_batch(batch_size)
        if i % 1000 == 0:
            train_acc = accuracy.eval(feed_dict={X: X_batch, Y: Y_batch})
            print("Step {0}, Train Acc {1}".format(i, train_acc))
        _, summary = sess.run([baseline_train_step, summary_op], feed_dict={X:X_batch, Y:Y_batch})
        baseline_writer.add_summary(summary, global_step=i)
    test_acc = accuracy.eval(feed_dict={X: mnist.test.images, Y:mnist.test.labels})
    print("Test Acc {1}".format(i, test_acc))

In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("Running Recon Bootstrap")
    soft_writer = tf.summary.FileWriter('./graph/recon', sess.graph)
    for i in range(num_iter):
        X_batch, Y_batch = mnist.train.next_batch(batch_size)
        if i % 1000 == 0:
            train_acc = accuracy.eval(feed_dict={X: X_batch, Y: Y_batch})
            print("Step {0}, Train Acc {1}".format(i, train_acc))
        _, summary = sess.run([recon_train_step, summary_op], feed_dict={X:X_batch, Y:Y_batch})
        soft_writer.add_summary(summary, global_step=i)
    test_acc = accuracy.eval(feed_dict={X: mnist.test.images, Y:mnist.test.labels})
    print("Test Acc {1}".format(i, test_acc))

Running Recon Bootstrap
Step 0, Train Acc 0.0399999991059
Step 1000, Train Acc 0.10000000149
Step 2000, Train Acc 0.0799999982119
Step 3000, Train Acc 0.0799999982119
Step 4000, Train Acc 0.0599999986589
Step 5000, Train Acc 0.0799999982119
Step 6000, Train Acc 0.0399999991059
Step 7000, Train Acc 0.0599999986589
Step 8000, Train Acc 0.0799999982119
Step 9000, Train Acc 0.019999999553
Test Acc 0.0988000035286


In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("Running Soft Bootstrap")
    soft_writer = tf.summary.FileWriter('./graph/soft', sess.graph)
    for i in range(num_iter):
        X_batch, Y_batch = mnist.train.next_batch(batch_size)
        if i % 1000 == 0:
            train_acc = accuracy.eval(feed_dict={X: X_batch, Y: Y_batch})
            print("Step {0}, Train Acc {1}".format(i, train_acc))
        _, summary = sess.run([soft_train_step, summary_op], feed_dict={X:X_batch, Y:Y_batch})
        soft_writer.add_summary(summary, global_step=i)
    test_acc = accuracy.eval(feed_dict={X: mnist.test.images, Y:mnist.test.labels})
    print("Test Acc {1}".format(i, test_acc))

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("Running Hard Bootstrap")
    hard_writer = tf.summary.FileWriter('./graph/hard', sess.graph)
    for i in range(num_iter):
        X_batch, Y_batch = mnist.train.next_batch(batch_size)
        if i % 1000 == 0:
            train_acc = accuracy.eval(feed_dict={X: X_batch, Y: Y_batch})
            print("Step {0}, Train Acc {1}".format(i, train_acc))
        _, summary = sess.run([hard_train_step, summary_op], feed_dict={X:X_batch, Y:Y_batch})
        hard_writer.add_summary(summary, global_step=i)
    test_acc = accuracy.eval(feed_dict={X: mnist.test.images, Y:mnist.test.labels})
    print("Test Acc {1}".format(i, test_acc))