In [1]:
import numpy as np
import tensorflow as tf
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
import sklearn.preprocessing
import sklearn.decomposition
from sklearn.linear_model import LogisticRegressionCV
from sklearn.datasets import fetch_mldata

In [2]:
DATA_HOME = "./data"
np.random.seed(42)

In [3]:
def batch_iter(data, batch_size, num_epochs, seed=None, fill=False):
    """
    Generates a batch iterator for a dataset.
    """
    random = np.random.RandomState(seed)
    data = np.array(data)
    data_length = len(data)
    num_batches_per_epoch = int(len(data)/batch_size)
    if len(data) % batch_size != 0:
        num_batches_per_epoch += 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        shuffle_indices = random.permutation(np.arange(data_length))
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_length)
            selected_indices = shuffle_indices[start_index:end_index]
            # If we don't have enough data left for a whole batch, fill it randomly
            if fill is True and end_index >= data_length:
                num_missing = batch_size - len(selected_indices)
                selected_indices = np.concatenate([selected_indices, random.randint(0, data_length, num_missing)])
            yield data[selected_indices]

In [4]:
# Load MNIST Data
mnist = fetch_mldata('MNIST original', data_home=DATA_HOME)
data_x = sklearn.preprocessing.scale(mnist.data)
x_train, x_test, y_train, y_test = train_test_split(data_x, mnist.target, test_size=0.1, random_state=42)



In [5]:
x_train.shape

(63000, 784)

In [16]:
class Autoencoder:
    def __init__(self, x, hidden_dims=[32]):
        self.layers = [x]
        
        # Build hidden layers
        for i, layer in enumerate(hidden_dims):
            layer_idx = i + 1
            prev_layer = self.layers[layer_idx-1]
            prev_layer_dim = prev_layer.get_shape().as_list()[1]
            layer_dim = hidden_dims[i]
            with tf.variable_scope("hidden-{}".format(i)):
                W = tf.Variable(
                        tf.truncated_normal([prev_layer_dim, layer_dim]),
                        name="W")
                b = tf.Variable(tf.zeros([layer_dim]), name="b")
                output = tf.nn.relu(tf.nn.xw_plus_b(prev_layer, W, b))
                self.layers.append(output)
        
        # Build output (reconstruction) layer 
        with tf.variable_scope("output"):
            output_dim = self.layers[0].get_shape().as_list()[1]
            prev_layer = self.layers[-1]
            prev_layer_dim = hidden_dims[-1]
            W = tf.Variable(
                tf.truncated_normal([prev_layer_dim, output_dim]),
                name="W")
            b = tf.Variable(tf.zeros([output_dim]), name="b")
            output = tf.nn.relu(tf.nn.xw_plus_b(prev_layer, W, b))
            self.layers.append(output)
        
        # Squared loss function
        self.loss = tf.reduce_sum(tf.square(self.layers[0] - self.layers[-1]))
        self.avg_loss = tf.reduce_mean(tf.square(self.layers[0] - self.layers[-1]))

In [None]:
BATCH_SIZE = 32
NUM_EPOCHS = 100
LAYERS = [32, 32]

graph = tf.Graph()
sess = tf.Session(graph=graph)
with graph.as_default(), sess.as_default():
    x = tf.placeholder(tf.float32, [None, x_train.shape[1]])
    ae = Autoencoder(x, LAYERS)
    
    # Training
    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer(1e-4)
    train_op = optimizer.minimize(ae.loss, global_step=global_step)
    
    sess.run(tf.initialize_all_variables())
    
    batches = batch_iter(x_train, BATCH_SIZE, NUM_EPOCHS)
    for x_batch in batches:
        feed_dict = { x: x_batch }
        _, loss, step = sess.run([train_op, ae.avg_loss, global_step], feed_dict)
#         if step % 1000 == 0:
#             print("{}: Train Loss: {:g}".format(step ,loss))
        if step % 1000 == 0:
            total_loss =  sess.run(ae.avg_loss, { x: x_train })
            print("{}: Mean Loss: {:g}".format(step ,total_loss))

1000: Mean Loss: 19134.6
2000: Mean Loss: 11394.8

In [19]:
# Get the compressed representation of the input X
with graph.as_default(), sess.as_default():
    x_train_transformed = sess.run(ae.layers[-2], { x: x_train })
    x_test_transformed = sess.run(ae.layers[-2], { x: x_test })

In [21]:
clf = KNeighborsClassifier()
clf.fit(x_train_transformed, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [22]:
y_pred = clf.predict(x_test_transformed)
print(classification_report(y_test, y_pred))

             precision    recall  f1-score   support

        0.0       0.37      0.82      0.51       671
        1.0       0.82      0.98      0.89       800
        2.0       0.33      0.29      0.31       697
        3.0       0.45      0.43      0.44       719
        4.0       0.55      0.50      0.53       653
        5.0       0.65      0.40      0.49       662
        6.0       0.79      0.64      0.71       712
        7.0       0.71      0.70      0.71       739
        8.0       0.62      0.36      0.46       686
        9.0       0.67      0.57      0.61       661

avg / total       0.60      0.58      0.57      7000

