In [3]:
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [4]:
def next_batch(index, batch_size,total,data,labels):
    """Return the next `batch_size` examples from this data set."""
    start = index
    end = index+batch_size if index+batch_size<= total else None
    return data[start:end], labels[start:end]

In [5]:
def plot_gallery(title, images, n_col, n_row,image_shape = (28, 28)):
    plt.figure(figsize=(2. * n_col, 2.26 * n_row))
    plt.suptitle(title, size=16)
    for i, comp in enumerate(images):
        plt.subplot(n_row, n_col, i + 1)
        vmax = max(comp.max(), -comp.min())
        plt.imshow(comp.reshape(image_shape), cmap=plt.cm.gray,            
                   vmin=-vmax, vmax=vmax)
        plt.xticks(())
        plt.yticks(())
    plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)
    plt.show()   

In [6]:
def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

# read training data from CSV file 
dataTrain = pd.read_csv('./kaggle-mnist/train.csv')
images = dataTrain.iloc[:,1:].values
images = images.astype(np.float)
images = np.multiply(images, 1.0 / 255.0)

labels_flat = dataTrain[[0]].values.ravel()
labels_count = np.unique(labels_flat).shape[0]
labels = dense_to_one_hot(labels_flat, labels_count)
labels = labels.astype(np.uint8)

In [7]:
images.shape

(42000, 784)

In [8]:
labels.shape


(42000, 10)

In [9]:
# Parameters Pre-training
learning_rate_unsup = 0.01
training_epochs_unsup = 10
batch_size_unsup = 256
display_step = 100
examples_to_show = 10

# Network Parameters
n_hidden_1 = 256 # 1st layer num features
n_hidden_2 = 128 # 2nd layer num features
n_input = images.shape[1] # MNIST data input (img shape: 28*28)


In [10]:
# Parameters Fine Tuning
learning_rate_sup= 0.001
training_epochs_sup = 10
batch_size_sup = 100
display_step = 100

# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
y = tf.placeholder("float", [None, labels_count])

In [11]:

# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])
with tf.variable_scope("semitraining"):
    weights = {
        'encoder_h1': tf.get_variable(initializer=tf.random_normal([n_input, n_hidden_1]),name="encoder_h1"),
        'encoder_h2': tf.get_variable(initializer=tf.random_normal([n_hidden_1, n_hidden_2]),name="encoder_h2"),
        'decoder_h1': tf.get_variable(initializer=tf.random_normal([n_hidden_2, n_hidden_1]),name="decoder_h1"),
        'decoder_h2': tf.get_variable(initializer=tf.random_normal([n_hidden_1, n_input]),name="decoder_h2"),
    }
    biases = {
        'encoder_b1': tf.get_variable(initializer=tf.random_normal([n_hidden_1]),name="encoder_b1"),
        'encoder_b2': tf.get_variable(initializer=tf.random_normal([n_hidden_2]),name="encoder_b2"),
        'decoder_b1': tf.get_variable(initializer=tf.random_normal([n_hidden_1]),name="decoder_b1"),
        'decoder_b2': tf.get_variable(initializer=tf.random_normal([n_input]),name="decoder_b2"),
    }
    # tf Graph input (only pictures)
    #finetuning
    weights['out_h3'] = tf.get_variable(initializer=tf.random_normal([n_input, labels_count]),name="out_h3")
    biases['out_b3'] = tf.get_variable(initializer=tf.random_normal([labels_count]),name="out_b3")


In [12]:
# Building the encoder
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
                                   biases['encoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
                                   biases['encoder_b2']))
    return layer_2


# Building the decoder
def decoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                                   biases['decoder_b2']))
    return layer_2

In [13]:
# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Hidden layer with RELU activation
    layer_3 = tf.add(tf.matmul(layer_2, weights['decoder_h1']), biases['decoder_b1'])
    layer_3 = tf.nn.relu(layer_3)
    # Hidden layer with RELU activation
    layer_4 = tf.add(tf.matmul(layer_3, weights['decoder_h2']), biases['decoder_b2'])
    layer_4 = tf.nn.relu(layer_4)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_4, weights['out_h3']) + biases['out_b3']
    return out_layer

In [14]:
# Create model
def softmaxLast_layer(x, weights, biases):
    encoder_op = encoder(X)
    decoder_op = decoder(encoder_op)
    # Output layer with linear activation
    out_layer = tf.matmul(decoder_op, weights['out_h3']) + biases['out_b3']
    return out_layer

In [15]:
with tf.variable_scope("semitraining",reuse=True):
    # Construct model
    encoder_op = encoder(X)
    decoder_op = decoder(encoder_op)

    # Prediction
    y_pred = decoder_op
    # Targets (Labels) are the input data.
    y_true = X

    # Define loss and optimizer, minimize the squared error
    cost_unsup = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
    optimizer_unsup = tf.train.RMSPropOptimizer(learning_rate_unsup).minimize(cost_unsup)


In [16]:
#Fine tuning
# Construct model
with tf.variable_scope("semitraining",reuse=True):
    pred = multilayer_perceptron(X, weights, biases)

    # Define loss and optimizer
    cost_sup = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
    optimizer_sup = tf.train.AdamOptimizer(learning_rate=learning_rate_sup).minimize(cost_sup,var_list=[tf.trainable_variables()[8],tf.trainable_variables()[9]])
    
    # Initializing the variables
    init = tf.initialize_all_variables()


In [17]:
total_batch = int(images.shape[0]/batch_size_unsup)
total_batch

164

In [18]:
[(i, v.name) for i,v  in enumerate(tf.trainable_variables())]

[(0, u'semitraining/encoder_h1:0'),
 (1, u'semitraining/encoder_h2:0'),
 (2, u'semitraining/decoder_h1:0'),
 (3, u'semitraining/decoder_h2:0'),
 (4, u'semitraining/encoder_b1:0'),
 (5, u'semitraining/encoder_b2:0'),
 (6, u'semitraining/decoder_b1:0'),
 (7, u'semitraining/decoder_b2:0'),
 (8, u'semitraining/out_h3:0'),
 (9, u'semitraining/out_b3:0')]

In [19]:
# read test data from CSV file 
datatest = pd.read_csv('./kaggle-mnist/test.csv')
imagestest = datatest.iloc[:,:].values
imagestest = imagestest.astype(np.float)
imagestest = np.multiply(imagestest, 1.0 / 255.0)


In [20]:
imagestest.shape

(28000, 784)

In [21]:
sess = tf.InteractiveSession()
sess.run(init)
print(sum(sum(sess.run(tf.trainable_variables()[0]))))


196.655081451


In [22]:
# Launch the graph
# Using InteractiveSession (more convenient while using Notebooks)
#tf.reset_default_graph()
with tf.Session() as sess:
    init = tf.initialize_all_variables()
    #sess = tf.InteractiveSession()
    sess.run(init)
    with tf.variable_scope("semitraining",reuse=True):
        
        total_batch = int(images.shape[0]/batch_size_unsup)
        
        # Test model
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({X: images, y: labels}))
        
#         print(sum(sum(sess.run(tf.trainable_variables()[0]))))
#         print(sum(sum(sess.run(tf.trainable_variables()[8]))))

        # Training cycle
        for epoch in range(training_epochs_unsup):
            # Loop over all batches
            for i in range(total_batch):
                batch_xs, batch_ys = next_batch(i*batch_size_unsup, batch_size_unsup,images.shape[0],images,labels)
                # Run optimization op (backprop) and cost op (to get loss value)
                _, c = sess.run([optimizer_unsup, cost_unsup], feed_dict={X: batch_xs})
            # Display logs per epoch step
            if epoch % display_step == 0:
#                 print(sum(sum(sess.run(tf.trainable_variables()[0]))))
#                 print(sum(sum(sess.run(tf.trainable_variables()[8]))))

                print("Epoch:", '%04d' % (epoch+1),
                      "cost=", "{:.9f}".format(c))

        print("Optimization Finished!")
#         print(sum(sum(sess.run(tf.trainable_variables()[0]))))
#         print(sum(sum(sess.run(tf.trainable_variables()[8]))))
        
        # Test model
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({X: images, y: labels}))
        
        # Training cycle
        for epoch in range(training_epochs_sup):
            avg_cost = 0.
            total_batch = int(images.shape[0]/batch_size_sup)
            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y = next_batch(i*batch_size_sup, batch_size_sup,images.shape[0],images,labels)
                # Run optimization op (backprop) and cost op (to get loss value)
                _, c = sess.run([optimizer_sup, cost_sup], feed_dict={X: batch_x,y: batch_y})
                # Compute average loss
                avg_cost += c / total_batch
            # Display logs per epoch step
            if epoch % display_step == 0:
#                 print(sum(sum(sess.run(tf.trainable_variables()[0]))))
#                 print(sum(sum(sess.run(tf.trainable_variables()[8]))))
                print( "Epoch:", '%04d' % (epoch+1), "cost=", \
                    "{:.9f}".format(avg_cost))
        print("Optimization Finished!")
    #         print(sum(sum(sess.run(tf.trainable_variables()[0]))))
    #         print(sum(sum(sess.run(tf.trainable_variables()[8]))))


        # Test model
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({X: images, y: labels}))
        # for i,val in enumeratete(tf.argmax(pred,1).eval({X:imagestest})):
        #     print(i,val)

Exception AssertionError: AssertionError("Nesting violated for default stack of <type 'weakref'> objects",) in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x7f87838199d0>> ignored


Accuracy: 0.118167
Epoch: 0001 cost= 0.254383057
Optimization Finished!
Accuracy: 0.103167
Epoch: 0001 cost= 158268.333984375
Optimization Finished!
Accuracy: 0.770048


In [23]:
# Applying encode and decode over test set
encode_decode = sess.run(
    y_pred, feed_dict={X: imagestest[:examples_to_show]})


RuntimeError: Attempted to use a closed Session.

In [None]:
# Compare original images with their reconstructions
plot_gallery('Base test MNIST',imagestest[:examples_to_show],examples_to_show/2,2)
plot_gallery('Base test Encoded_Decoded MNIST',encode_decode[:examples_to_show],examples_to_show/2,2)


In [None]:
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
