In [1]:
# multilayer perceptron 
# tensorflow

In [20]:
import tensorflow as tf 
import numpy  as np

import os 
import urllib
import struct
# import gzip

In [50]:
if not os.path.exists('./mnist'):
    os.mkdir('./mnist')
    
train_images = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
train_labels = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")
test_images = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
test_labels = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")

In [51]:
paths = [
    './mnist/train-images-idx3-ubyte.gz',
    './mnist/train-labels-idx1-ubyte.gz',
    './mnist/t10k-images-idx3-ubyte.gz',
    './mnist/t10k-labels-idx1-ubyte.gz'
]

objs = [
    train_images, train_labels, test_images, test_labels
]

for p,o in zip(paths, objs):
    print(p, o)
    with open(p, 'wb') as output:
        output.write(o.read())

./mnist/train-images-idx3-ubyte.gz <http.client.HTTPResponse object at 0x7f72a4294550>
./mnist/train-labels-idx1-ubyte.gz <http.client.HTTPResponse object at 0x7f72a42947b8>
./mnist/t10k-images-idx3-ubyte.gz <http.client.HTTPResponse object at 0x7f72a4294cf8>
./mnist/t10k-labels-idx1-ubyte.gz <http.client.HTTPResponse object at 0x7f72a4294668>


In [52]:
def load_mnist(path, kind='train'):
    
    labels_path = os.path.join(path, '%s-labels-idx1-ubyte' % kind)
    images_path = os.path.join(path, '%s-images-idx3-ubyte' % kind)
    
    with open(labels_path, 'rb') as lb_path:
        magic, n = struct.unpack('>II', lb_path.read(8))
        labels = np.fromfile(lb_path, dtype=np.uint8)
        
    with open(images_path, 'rb') as img_path:
        magic, num, rows, cols = struct.unpack('>IIII', img_path.read(16))
        images = np.fromfile(img_path, dtype=np.uint8).reshape(len(labels), 784)
        images = ((images / 255) - 0.5) * 2
        
    return images, labels

In [54]:
!gzip ./mnist/*.gz -d

In [55]:
!ls mnist/

t10k-images-idx3-ubyte	train-images-idx3-ubyte
t10k-labels-idx1-ubyte	train-labels-idx1-ubyte


In [59]:
# data load

X_train, y_train = load_mnist('./mnist/', kind='train')
X_test, y_test = load_mnist('./mnist/', kind='t10k')

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60000, 784) (60000,) (10000, 784) (10000,)


In [60]:
# standardization

mean_vals = np.mean(X_train, axis=0)
std_val = np.std(X_train)

X_train_centered = (X_train - mean_vals) / std_val
X_test_centered = (X_test - mean_vals) / std_val

del X_train, X_test

In [61]:
n_features = X_train_centered.shape[1]
n_classes = 10
random_seed = 123
np.random.seed(123)

In [87]:
# multilayer perceptron: dense-dense-logit

g = tf.Graph()
with g.as_default():
    tf.set_random_seed(random_seed)
    # input 
    tf_x = tf.placeholder(dtype=tf.float32, shape=(None, n_features), name='tf_x')
    tf_y = tf.placeholder(dtype=tf.int32, shape=(None), name='tf_y')
    # to categorical 
    y_onehot = tf.one_hot(indices=tf_y, depth=n_classes)
    # dense layer 1
    h1 = tf.layers.dense(inputs=tf_x, units=50, activation=tf.tanh, name='layer1')
    # dense layer 2
    h2 = tf.layers.dense(inputs=h1, units=50, activation=tf.tanh, name='layer2')
    # activation
    logits = tf.layers.dense(inputs=h2, units=10, activation=None, name='layer3')
    # prediction
    predictions = {
        'classes' : tf.argmax(logits, axis=1, name='predicted_classes'), 
        'probabilities' : tf.nn.softmax(logits, name='softmax_tensor')
    }
    

In [88]:
# cost function and optimizer 

with g.as_default():
    # cost function : multiclass cross-entropy
    cost = tf.losses.softmax_cross_entropy(onehot_labels=y_onehot, logits=logits)
    # optimizer: gradient descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

    train_op = optimizer.minimize(loss=cost)
    init_op = tf.global_variables_initializer()
    

In [96]:
# minibatch generator 

def create_batch_generator(X, y, batch_size=128, shuffle=False):
    X_copy = np.array(X)
    y_copy = np.array(y)
    
    if shuffle:
        data = np.column_stack((X_copy, y_copy))
        np.random.shuffle(data)
        X_copy = data[:, :-1]
        y_copy = data[:, -1].astype(int)
        
    for i in range(0, X.shape[0], batch_size):
        yield X_copy[i:i+batch_size, :], y_copy[i:i+batch_size]


In [97]:
# training 
sess = tf.Session(graph=g)
sess.run(init_op)

for epoch in range(50):
    training_costs = []
    batch_generator = create_batch_generator(X_train_centered, y_train, batch_size=64)
    for batch_X, batch_y in batch_generator:
        feed = {tf_x: batch_X, tf_y: batch_y}
        _, batch_cost = sess.run([train_op, cost], feed_dict=feed)
        training_costs.append(batch_cost)
    print('Epoch {:d}, loss {:.3f}'.format(epoch+1, np.mean(training_costs)))

Epoch 1, loss 1.557
Epoch 2, loss 0.949
Epoch 3, loss 0.750
Epoch 4, loss 0.639
Epoch 5, loss 0.567
Epoch 6, loss 0.516
Epoch 7, loss 0.478
Epoch 8, loss 0.449
Epoch 9, loss 0.425
Epoch 10, loss 0.405
Epoch 11, loss 0.388
Epoch 12, loss 0.374
Epoch 13, loss 0.362
Epoch 14, loss 0.351
Epoch 15, loss 0.341
Epoch 16, loss 0.332
Epoch 17, loss 0.324
Epoch 18, loss 0.317
Epoch 19, loss 0.310
Epoch 20, loss 0.303
Epoch 21, loss 0.298
Epoch 22, loss 0.292
Epoch 23, loss 0.287
Epoch 24, loss 0.282
Epoch 25, loss 0.278
Epoch 26, loss 0.273
Epoch 27, loss 0.269
Epoch 28, loss 0.265
Epoch 29, loss 0.262
Epoch 30, loss 0.258
Epoch 31, loss 0.255
Epoch 32, loss 0.251
Epoch 33, loss 0.248
Epoch 34, loss 0.245
Epoch 35, loss 0.242
Epoch 36, loss 0.240
Epoch 37, loss 0.237
Epoch 38, loss 0.234
Epoch 39, loss 0.232
Epoch 40, loss 0.229
Epoch 41, loss 0.227
Epoch 42, loss 0.225
Epoch 43, loss 0.223
Epoch 44, loss 0.220
Epoch 45, loss 0.218
Epoch 46, loss 0.216
Epoch 47, loss 0.214
Epoch 48, loss 0.212
E

In [101]:
# prediction 
feed = {tf_x: X_test_centered}
y_pred = sess.run(predictions['classes'], feed_dict=feed)

print('acc:', 100*np.sum(y_pred == y_test)/y_test.shape[0], '%')

acc: 93.89 %
