In [1]:
import gzip
import os
import tensorflow as tf
import numpy
data_path = '/home/hadoop/data/mnist'
def load_mnist():
    train_data = os.path.join(data_path,'train-images-idx3-ubyte.gz')
    train_label = os.path.join(data_path, 'train-labels-idx1-ubyte.gz')
    test_data = os.path.join(data_path, 't10k-images-idx3-ubyte.gz')
    test_label = os.path.join(data_path, 't10k-labels-idx1-ubyte.gz')
    def _read32(bytestream):
        return numpy.frombuffer(bytestream.read(4), 
                        dtype=numpy.dtype(numpy.uint32).newbyteorder('>'))[0]
    
    def _read_image(filename):
        with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
            magic = _read32(bytestream)
            if magic != 2051:
              raise ValueError(
                  'Invalid magic number %d in MNIST image file: %s' %
                  (magic, filename))
            num_images = _read32(bytestream)
            rows = _read32(bytestream)
            cols = _read32(bytestream)
            buf = bytestream.read(rows * cols * num_images)
            data = numpy.frombuffer(buf, dtype=numpy.uint8)
            data = data.reshape(num_images, rows * cols)
            return data.astype(numpy.float32)
    
    def dense_to_one_hot(labels_dense, num_classes):
          """Convert class labels from scalars to one-hot vectors."""
          num_labels = labels_dense.shape[0]
          index_offset = numpy.arange(num_labels) * num_classes
          labels_one_hot = numpy.zeros((num_labels, num_classes))
          labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
          return labels_one_hot
    
    def _read_label(filename):
        with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
            magic = _read32(bytestream)
            if magic != 2049:
                ValueError(
                  'Invalid magic number %d in MNIST image file: %s' %
                  (magic, filename))
            num_labels = _read32(bytestream)
            buf = bytestream.read(num_labels)
            labels = numpy.frombuffer(buf, dtype=numpy.uint8)
            return dense_to_one_hot(labels.astype(numpy.int32), 10)
    
    return _read_image(train_data), _read_label(train_label), _read_image(test_data), _read_label(test_label)

train_data, train_label, test_data, test_label = load_mnist()

assert(train_data.shape == (60000, 784))
assert(train_label.shape == (60000, 10))
assert(test_data.shape == (10000, 784))
assert(test_label.shape == (10000, 10))

In [5]:
# normalize image
train_data_norm = numpy.expand_dims( numpy.sqrt(numpy.sum(train_data * train_data, axis=1)), axis=1)
train_data = numpy.divide(train_data, train_data_norm)

test_data_norm = numpy.expand_dims( numpy.sqrt(numpy.sum(test_data * test_data, axis=1)), axis=1)
test_data = numpy.divide(test_data, test_data_norm)

In [34]:
def get_model(is_training=True):
    def fc(scope, input_size, hidden_size, x):
        with tf.variable_scope(scope):
            w = tf.get_variable('w', [input_size, hidden_size], 
                                initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
            b = tf.get_variable('b', [hidden_size], 
                                initializer=tf.constant_initializer(1))
            h = tf.matmul(x,w) + b
            if is_training:
                h = tf.nn.dropout(h, keep_prob=0.8)
            return tf.nn.softmax(h)

    x = tf.placeholder(tf.float32, [None, 784])
    y = tf.placeholder(tf.float32, [None, 10])
    if is_training:
        x = tf.nn.dropout(x, keep_prob=0.8)
    
    z1 = fc('fc1', 784, 128, x)
    z2 = fc('fc2', 128, 64, z1)
    logits = fc('fc3', 64, 10, z2)
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(logits,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    loss = -tf.reduce_sum(y * tf.log(logits))
        
    if not is_training:
        return x, y, loss, accuracy

    lr = 0.005
    train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss)
    return x, y, loss, accuracy, train_step

In [40]:
g = tf.Graph()
with g.as_default():
    session = tf.Session()
    with tf.variable_scope("model", reuse=None):
        x_train, y_train, train_loss, train_accuracy, train_step = get_model()
    with tf.variable_scope("model", reuse=True):
        x_test, y_test, test_loss, test_accuracy = get_model(is_training=False)
    
    session.run(tf.initialize_all_variables())
    batch_size = 512

    for i in range(1, 2000):
        perm = numpy.random.permutation(numpy.arange(len(train_data)))
        _train = train_data[perm]
        _label = train_label[perm]
        
        _train_loss = 0
        for k in range(len(train_data)/batch_size - 1):
            start = k * batch_size
            end = (k+1) * batch_size
            _loss, _ = session.run([train_loss, train_step], 
                                   feed_dict={x_train:_train[start:end], y_train:_label[start:end]})
            _train_loss += _loss
    
        if i % 200 == 0:
            print "epoch:", i
            print "train_loss: ", _train_loss
            _test_loss, _test_accuracy = session.run([test_loss, test_accuracy], 
                                                     feed_dict={x_test:test_data, y_test:test_label})
            print "test_loss: ", _test_loss, "  test_accuracy: ", _test_accuracy
            print ""

epoch: 200
train_loss:  33578.043808
test_loss:  2734.51   test_accuracy:  0.9381

epoch: 400
train_loss:  27039.8695831
test_loss:  1902.47   test_accuracy:  0.9473

epoch: 600
train_loss:  24596.6051788
test_loss:  1611.39   test_accuracy:  0.9542

epoch: 800
train_loss:  23444.0808563
test_loss:  1472.13   test_accuracy:  0.9583

epoch: 1000
train_loss:  21871.5790253
test_loss:  1392.62   test_accuracy:  0.961

epoch: 1200
train_loss:  21289.841629
test_loss:  1349.07   test_accuracy:  0.9627

epoch: 1400
train_loss:  20496.9544525
test_loss:  1305.26   test_accuracy:  0.9653

epoch: 1600
train_loss:  19672.5226746
test_loss:  1295.08   test_accuracy:  0.9668

epoch: 1800
train_loss:  19341.6393661
test_loss:  1341.24   test_accuracy:  0.9663

