In [1]:
import tensorflow as tf
import numpy as np
from functools import partial

n_class = 10

height, width, n_channels = (28, 28, 1)
n_inputs = height * width

with tf.name_scope('inputs'):
    X = tf.placeholder(dtype=tf.float32, shape=[None, n_inputs], name='X')
    X_reshaped = tf.reshape(X, shape=[-1, height, width, n_channels])
    y = tf.placeholder(dtype=tf.int32, shape=[None], name='y')
    training = tf.placeholder_with_default(False, shape=[], name='training')
    
with tf.name_scope('convolutional_layers'):
    conv_layer = partial(tf.layers.conv2d, kernel_size=3, strides=1, padding='same')
    conv1 = conv_layer(inputs=X_reshaped, filters=32)
    conv2 = conv_layer(inputs=conv1, filters=64)
    pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=[2, 2], padding='valid')
    flat1 = tf.reshape(pool1, shape=[-1, 64 * 14 * 14])
    drop1 = tf.layers.dropout(flat1, rate=0.25, training=training)

with tf.name_scope('classifier'):
    dense1 = tf.layers.dense(inputs=drop1, units=128, activation=tf.nn.elu, name='dense1')
    drop2 = tf.layers.dropout(dense1, rate=0.5, training=training)

with tf.name_scope('logits'):
    logits = tf.layers.dense(inputs=drop2, units=n_class, name='output')
    y_prob = tf.nn.softmax(logits, name='y_prob')
    
with tf.name_scope('train'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope('init-and-save'):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

W0819 23:19:57.953969 140388258760512 deprecation.py:323] From <ipython-input-1-efd4b86e82c6>:18: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
W0819 23:19:57.956670 140388258760512 deprecation.py:506] From /home/guhwanbae/anaconda3/envs/gu-keras/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0819 23:19:58.104149 140388258760512 deprecation.py:323] From <ipython-input-1-efd4b86e82c6>:20: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
W0819 23:19:58.182551 14038825

In [2]:
def get_model_params():
    gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}

def restore_model_params(model_params):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + '/Assign')
                     for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    tf.get_default_session().run(assign_ops, feed_dict=feed_dict)

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def load_preprocessed_mnist(test_size=10000):
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    X = np.r_[X_train, X_test]
    y = np.r_[y_train, y_test]
    n_features = 28*28
    X = X.astype(np.float32).reshape(-1,n_features)
    y = y.astype(np.int32)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return train_test_split(X, y, test_size=test_size)

In [4]:
X_train, X_test, y_train, y_test = load_preprocessed_mnist()
X_valid, y_valid = X_test[:5000], y_test[:5000]
X_test, y_test = X_test[5000:], y_test[5000:]

In [5]:
def shuffled_batch(X, y, batch_size):
    n_samples, n_features = X.shape
    n_batches = int(np.ceil(n_samples // batch_size))
    random_idx = np.random.permutation(n_samples)
    for idx in np.array_split(random_idx, n_batches):
        X_batch, y_batch = X[idx], y[idx]
        yield X_batch, y_batch

In [6]:
n_epochs = 1000
batch_size = 50
iteration = 0

best_loss_val = np.infty
check_interval = 500
checks_since_last_progress = 0
max_checks_without_progress = 20
best_model_params = None

history = {}

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffled_batch(X_train, y_train, batch_size):
            iteration += 1
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
            if iteration % check_interval == 0:
                loss_val = loss.eval(feed_dict={X: X_valid, y: y_valid})
                if loss_val < best_loss_val:
                    best_loss_val = loss_val
                    checks_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    checks_since_last_progress += 1
        
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        history[epoch] = (acc_batch, acc_val)
        
        print('epoch={:5}\tacc_batch={:3.6f}\tacc_val={:3.6f}'.format(epoch, acc_batch, acc_val))
        
        if checks_since_last_progress > max_checks_without_progress:
            print('Early stopping! Final metrics: acc_val={:3.6f}'.format(acc_val))
            break
            
    if best_model_params:
        restore_model_params(best_model_params)
        
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    
    print('Test accuracy={:3.6f}'.format(acc_test))
    
    saver.save(sess, './cnn_minist_classifier_best_model')

epoch=    0	acc_batch=0.960000	acc_val=0.966800
epoch=    1	acc_batch=0.980000	acc_val=0.977200
epoch=    2	acc_batch=1.000000	acc_val=0.972600
epoch=    3	acc_batch=0.980000	acc_val=0.979600
epoch=    4	acc_batch=1.000000	acc_val=0.978600
epoch=    5	acc_batch=1.000000	acc_val=0.980400
epoch=    6	acc_batch=0.980000	acc_val=0.983000
epoch=    7	acc_batch=0.960000	acc_val=0.984000
epoch=    8	acc_batch=1.000000	acc_val=0.984400
epoch=    9	acc_batch=1.000000	acc_val=0.985600
epoch=   10	acc_batch=1.000000	acc_val=0.984400
epoch=   11	acc_batch=0.980000	acc_val=0.985600
epoch=   12	acc_batch=1.000000	acc_val=0.981800
epoch=   13	acc_batch=1.000000	acc_val=0.985800
epoch=   14	acc_batch=1.000000	acc_val=0.985800
epoch=   15	acc_batch=1.000000	acc_val=0.986000
epoch=   16	acc_batch=1.000000	acc_val=0.983200
Early stopping! Final metrics: acc_val=0.983200
Test accuracy=0.982200
