In [5]:
import tensorflow as tf

import os
import numpy as np
import struct
# from tensorflow.examples.tutorials.mnist import input_datat_data

### mnist - multilayer perceptron w/ tensorflow layers

In [6]:
def load_mnist(path, kind='train'):
    kind = 't10k' if kind=='test' else kind
    labels_path = os.path.join(path, '{}-labels-idx1-ubyte'.format(kind))
    img_path = os.path.join(path, '{}-images-idx3-ubyte'.format(kind))
    
    with open(labels_path, 'rb') as label_p:
        magic, n = struct.unpack('>II', label_p.read(8))
        labels = np.fromfile(label_p, dtype=np.uint8)
        
    with open(img_path, 'rb') as img_p:
        magic, n, rows, cols = struct.unpack('>IIII', img_p.read(16))
        images = np.fromfile(img_p, dtype=np.uint8).reshape(len(labels), 784)
        
    return images, labels

In [7]:
X_train, y_train = load_mnist('./mnist/', kind='train')

In [8]:
X_test, y_test = load_mnist('./mnist/', kind='t10k')

In [9]:
## mean centering and normalization:
mean_vals = np.mean(X_train, axis=0)
std_val = np.std(X_train)

X_train_centered = (X_train - mean_vals)/std_val
X_test_centered = (X_test - mean_vals)/std_val

In [10]:
n_features = X_train_centered.shape[1]
n_classes = 10
random_seed = 123
np.random.seed(random_seed)

In [12]:
g = tf.Graph()

with g.as_default():
    tf.set_random_seed(random_seed)
    
    tf_x = tf.placeholder(dtype=tf.float32,
                         shape=(None, n_features),
                         name='tf_x')
    tf_y = tf.placeholder(dtype=tf.int32,
                         shape=None,
                         name='tf_y')
    y_onehot = tf.one_hot(indices=tf_y,
                         depth=n_classes)
    
    h1 = tf.layers.dense(inputs=tf_x,
                        units=50,
                        activation=tf.tanh,
                        name='layer1')
    h2 = tf.layers.dense(inputs=h1,
                        units=50,
                        activation=tf.tanh,
                        name='layer2')
    logits = tf.layers.dense(inputs=h2,
                            units=10,
                            activation=None,
                            name='layer3')
    
    predictions = {
        'classes': tf.argmax(logits,
                            axis=1,
                            name='pred_class'),
        'probabilities': tf.nn.softmax(logits,
                                      name='softmax_tensor')
    } 

In [13]:
with g.as_default():
    cost = tf.losses.softmax_cross_entropy(onehot_labels=y_onehot, logits=logits)
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    
    train_op = optimizer.minimize(loss=cost)
    
    init_op = tf.global_variables_initializer()

Instructions for updating:
Use tf.cast instead.


In [15]:
def create_batch_generator(X, y, batch_size=128, shuffle=False):
    X_copy = np.array(X)
    y_copy = np.array(y)
    
    if shuffle:
        data = np.column_stack((X_copy, y_copy))
        np.random.shuffle(data)
        X_copy = data[:, :-1]
        y_copy = data[:, -1].astype(int)
        
    for i in range(0, X.shape[0], batch_size):
        yield (X_copy[i:i+batch_size, :], y_copy[i:i+batch_size])

In [16]:
epochs = 50

sess = tf.Session(graph=g)
sess.run(init_op)

for epoch in range(epochs):
    training_costs = []
    
    batch_gen = create_batch_generator(X_train_centered, y_train, batch_size=64, shuffle=True)
    
    for X_batch, y_batch in batch_gen:
        feed = {tf_x:X_batch, tf_y:y_batch}
        _, batch_cost = sess.run([train_op, cost], feed_dict=feed)
        training_costs.append(batch_cost)
        
    print('-- EPOCH {} -- Avg. Cost: {}'.format(epoch+1, np.mean(training_costs)))

-- EPOCH 1 -- Avg. Cost: 1.5572493076324463
-- EPOCH 2 -- Avg. Cost: 0.9490127563476562
-- EPOCH 3 -- Avg. Cost: 0.7497641444206238
-- EPOCH 4 -- Avg. Cost: 0.6385995149612427
-- EPOCH 5 -- Avg. Cost: 0.5667569637298584
-- EPOCH 6 -- Avg. Cost: 0.5158420205116272
-- EPOCH 7 -- Avg. Cost: 0.4780220091342926
-- EPOCH 8 -- Avg. Cost: 0.4484705626964569
-- EPOCH 9 -- Avg. Cost: 0.4247126281261444
-- EPOCH 10 -- Avg. Cost: 0.405110239982605
-- EPOCH 11 -- Avg. Cost: 0.3884292244911194
-- EPOCH 12 -- Avg. Cost: 0.3742930293083191
-- EPOCH 13 -- Avg. Cost: 0.36180493235588074
-- EPOCH 14 -- Avg. Cost: 0.3507132828235626
-- EPOCH 15 -- Avg. Cost: 0.3408225476741791
-- EPOCH 16 -- Avg. Cost: 0.331996887922287
-- EPOCH 17 -- Avg. Cost: 0.32398995757102966
-- EPOCH 18 -- Avg. Cost: 0.31648558378219604
-- EPOCH 19 -- Avg. Cost: 0.30970874428749084
-- EPOCH 20 -- Avg. Cost: 0.30342885851860046
-- EPOCH 21 -- Avg. Cost: 0.29759082198143005
-- EPOCH 22 -- Avg. Cost: 0.29222217202186584
-- EPOCH 23 --

In [17]:
y_pred = sess.run(predictions['classes'], feed_dict={tf_x:X_test_centered})

print('test accuracy: ',
     np.sum(y_pred == y_test)/y_test.shape[0])

test accuracy:  0.9388


### mnist - multilayer perceptron w/ keras

In [18]:
import tensorflow.contrib.keras as keras

In [19]:
np.random.seed(random_seed)
tf.set_random_seed(random_seed)

In [20]:
y_train_onehot = keras.utils.to_categorical(y_train)
n_classes = y_train_onehot.shape[1]

model = keras.models.Sequential()

model.add(keras.layers.Dense(units=50,
                            input_dim=n_features,
                            kernel_initializer='glorot_uniform',
                            bias_initializer='zeros',
                            activation='tanh'))
model.add(keras.layers.Dense(units=50,
                            input_dim=50,
                            kernel_initializer='glorot_uniform',
                            bias_initializer='zeros',
                            activation='tanh'))
model.add(keras.layers.Dense(units=n_classes,
                            input_dim=50,
                            kernel_initializer='glorot_uniform',
                            bias_initializer='zeros',
                            activation='softmax'))

sgd_optimizer = keras.optimizers.SGD(lr=0.001,
                                    decay=1e-7,
                                    momentum=.9)

# compile
model.compile(optimizer=sgd_optimizer,
             loss='categorical_crossentropy')

In [21]:
train_log = model.fit(X_train_centered, y_train_onehot,
                     batch_size=64,
                     epochs=50,
                     verbose=True,
                     validation_split=0.1)

Train on 54000 samples, validate on 6000 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [22]:
y_train_pred = model.predict_classes(X_train_centered, verbose=True)
print('training accuracy: ', np.sum(y_train == y_train_pred, axis=0) / y_train.shape[0])

training accuracy:  0.9881333333333333


In [24]:
y_test_pred = model.predict_classes(X_test_centered, verbose=True)
print('test accuracy: ', np.sum(y_test == y_test_pred, axis=0) / y_test.shape[0])

test accuracy:  0.9627
