In [7]:
# multilayer perceptron dense(tanh)-dense(tanh)-dense(softmax)
# Keras

In [2]:
import tensorflow as tf 
import keras
import numpy  as np

import os 
import urllib
import struct
# import gzip

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [50]:
if not os.path.exists('./mnist'):
    os.mkdir('./mnist')
    
train_images = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
train_labels = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")
test_images = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
test_labels = urllib.request.urlopen("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")

paths = [
    './mnist/train-images-idx3-ubyte.gz',
    './mnist/train-labels-idx1-ubyte.gz',
    './mnist/t10k-images-idx3-ubyte.gz',
    './mnist/t10k-labels-idx1-ubyte.gz'
]

objs = [
    train_images, train_labels, test_images, test_labels
]

for p,o in zip(paths, objs):
    print(p, o)
    with open(p, 'wb') as output:
        output.write(o.read())

In [3]:
def load_mnist(path, kind='train'):
    
    labels_path = os.path.join(path, '%s-labels-idx1-ubyte' % kind)
    images_path = os.path.join(path, '%s-images-idx3-ubyte' % kind)
    
    with open(labels_path, 'rb') as lb_path:
        magic, n = struct.unpack('>II', lb_path.read(8))
        labels = np.fromfile(lb_path, dtype=np.uint8)
        
    with open(images_path, 'rb') as img_path:
        magic, num, rows, cols = struct.unpack('>IIII', img_path.read(16))
        images = np.fromfile(img_path, dtype=np.uint8).reshape(len(labels), 784)
        images = ((images / 255) - 0.5) * 2
        
    return images, labels

In [4]:
!gzip ./mnist/*.gz -d

gzip: ./mnist/*.gz: No such file or directory


In [5]:
!ls mnist/

t10k-images-idx3-ubyte	train-images-idx3-ubyte
t10k-labels-idx1-ubyte	train-labels-idx1-ubyte


In [8]:
# data load

X_train, y_train = load_mnist('./mnist/', kind='train')
X_test, y_test = load_mnist('./mnist/', kind='t10k')

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60000, 784) (60000,) (10000, 784) (10000,)


In [9]:
# standardization

mean_vals = np.mean(X_train, axis=0)
std_val = np.std(X_train)

X_train_centered = (X_train - mean_vals) / std_val
X_test_centered = (X_test - mean_vals) / std_val

del X_train, X_test

In [10]:
random_seed = 123
np.random.seed(123)

In [14]:
y_train_onehot = keras.utils.to_categorical(y_train)

model = keras.models.Sequential()

model.add(keras.layers.Dense(
    units=50, 
    input_dim=X_train_centered.shape[1], 
    kernel_initializer='glorot_uniform',
    bias_initializer='zeros',
    activation='tanh'
))

model.add(keras.layers.Dense(
    units=50,
    input_dim=50, 
    kernel_initializer='glorot_uniform',
    bias_initializer='zeros',
    activation='tanh'    
))

model.add(keras.layers.Dense(
    units=y_train_onehot.shape[1],
    input_dim=50,
    kernel_initializer='glorot_uniform',
    bias_initializer='zeros',
    activation='softmax'
))

sgd_optimizer = keras.optimizers.SGD(lr=0.001, decay=1e-7, momentum=0.9)
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy')

In [15]:
history = model.fit(X_train_centered, y_train_onehot, batch_size=64, epochs=50, verbose=1, validation_split=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [16]:
y_train_pred = model.predict_classes(X_train_centered, verbose=0)
print(y_train_pred[:3])

[5 0 4]


In [23]:
correct_preds = np.sum(y_train == y_train_pred, axis=0)
print('{:.2f}%'.format(correct_preds / y_train.shape[0] * 100))

99.34


In [25]:
y_test_pred = model.predict_classes(X_test_centered, verbose=0)
correct_preds = np.sum(y_test == y_test_pred, axis=0)
print('{:.2f}%'.format(correct_preds / y_test.shape[0] * 100))

96.40%
