In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
%matplotlib inline

In [4]:
# Load data
with np.load(os.path.join('data','mnist-60k.npz'), allow_pickle=False) as npz_file:
    # Load items into a dictionary
    mnist = dict(npz_file.items())
    
X = mnist['data'].astype(np.float32).reshape(-1, 28, 28, 1)
y = mnist['labels']

In [5]:
# scale and split
from sklearn.model_selection import train_test_split

# Create train/test sets
X_tr, X_te, y_tr, y_te = train_test_split(X,y,test_size=0.1, random_state=0)

# Split again into validation/test sets
X_cv, X_te, y_cv, y_te = train_test_split(X_te, y_te,test_size=0.5, random_state=0)

print("X_tr", X_tr.shape)
print("X_te", X_te.shape)
print("X_cv", X_cv.shape)

X_tr (54000, 28, 28, 1)
X_te (3000, 28, 28, 1)
X_cv (3000, 28, 28, 1)


### With an Estimator

In [6]:
## Model Function

def model_fn(features, labels, mode = "PREDICT"):
    input_layer = tf.reshape(features["X"], [-1, 28, 28, 1])
    y = labels
    
    #training = tf.placeholder(dtype=tf.bool)

    # create global step for decaying learning rate
    #global_step = tf.Variable(0, trainable=False)

    # Decay the learning rate - 
    #learning_rate = tf.train.exponential_decay(0.001,               # start at 0.001
    #                                           global_step, 
    #                                           2000,                # 2000 steps
    #                                           0.9,                 # 0.95 increment
    #                                           staircase=True)

    # Convolutional layer 1 
    conv1 = tf.layers.conv2d(
        input_layer,                           # Input data
        filters=32,                  # 32 filters
        kernel_size=(5, 5),          # Kernel size: 5x5
        strides=(1, 1),              # Stride: 1
        padding='SAME',              # "same" padding
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0), # Small standard deviation
        name='conv1'                  # Add name
    )

    # Max pooling layer 1
    pool1 = tf.layers.max_pooling2d(
        conv1,                       # Input
        pool_size=(2, 2),            # Pool size: 2x
        strides=(2, 2),              # Stride: 2
        padding='SAME',              # "same" padding
        name='pool1'
    )

    # Convolutional layer 2
    conv2 = tf.layers.conv2d(
        pool1,                       # Input
        filters=64,                  # 64 filters
        kernel_size=(5, 5),          # Kernel size: 5x5
        strides=(1, 1),              # Stride: 1
        padding='SAME',              # "same" padding
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0),    # Small standard deviation
        name='conv2'                 # Add name
    )

    # Max pooling layer 2 (2x2, stride: 2) - TUNED
    pool2 = tf.layers.max_pooling2d(
        conv2,                       # input
        pool_size=(2, 2),            # pool size 2x2
        strides=(2, 2),              # stride 2
        padding='SAME'
    )

    # Flatten output
    flat_output = tf.contrib.layers.flatten(pool2)

    # Fully connected layer
    fc1 = tf.layers.dense(
        flat_output,                 # input
        1024,                         # 256 hidden units
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
        bias_initializer=tf.zeros_initializer()
    )

    if mode == tf.estimator.ModeKeys.TRAIN:
        fc1 = tf.layers.dropout(fc1, rate=0.40, seed=1)

    # logits
    logits = tf.layers.dense(
        fc1,                         # input
        10,                           # One output unit per category
        activation=None,             # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer()
    )

    # predictions
    predictions = {
      "classes": tf.argmax(input=logits, axis=1),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=logits)
    accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    ## Else if mode == "EVAL"
    
    # Compute predictions and accuracy
    #is_correct = tf.equal(y, predictions['classes'])
    #accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))

    eval_metric_ops = {
      "accuracy": accuracy,
      #"cost": loss,  
    }

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [7]:
config = tf.ConfigProto(
    # uncomment to run on CPU
    device_count = {'GPU': 0}
)
estimator_config = tf.estimator.RunConfig(session_config=config)

# create the Estimator
mnist_classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir="./mnist_convnet_model", config=estimator_config)

# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_tr},
    y=y_tr,
    batch_size=128,
    num_epochs=None,
    shuffle=True)

INFO:tensorflow:Using config: {'_model_dir': './mnist_convnet_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': device_count {
  key: "GPU"
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001C0F74F79E8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [35]:
# train 
mnist_classifier.train(input_fn=train_input_fn, steps=5000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./mnist_convnet_model\model.ckpt-25002
INFO:tensorflow:Saving checkpoints for 25003 into ./mnist_convnet_model\model.ckpt.
INFO:tensorflow:loss = 0.00087020826, step = 25003
INFO:tensorflow:global_step/sec: 3.6752
INFO:tensorflow:loss = 0.00087107084, step = 25103 (27.226 sec)
INFO:tensorflow:global_step/sec: 3.78393
INFO:tensorflow:loss = 0.001524379, step = 25203 (26.426 sec)
INFO:tensorflow:global_step/sec: 3.71335
INFO:tensorflow:loss = 0.0015444546, step = 25303 (26.914 sec)
INFO:tensorflow:global_step/sec: 3.72958
INFO:tensorflow:loss = 0.003645241, step = 25403 (26.814 sec)
INFO:tensorflow:global_step/sec: 3.64249
INFO:tensorflow:loss = 0.0015865283, step = 25503 (27.468 sec)
INFO:tensorflow:global_step/sec: 3.73841
INFO:tensorflow:loss = 0.00087912916, step = 25603 (26.734 sec)
INFO:tensorflow:global_step/sec: 3.804
INFO:tensorflow:loss = 0.0017781884, step = 25703 (26.288 sec)
INFO:tensorflow

<tensorflow.python.estimator.estimator.Estimator at 0x277a0e6bbe0>

In [8]:
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_cv},
    y=y_cv,
    num_epochs=1,
    shuffle=False)

eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)

print(eval_results)

INFO:tensorflow:Starting evaluation at 2018-02-10-13:51:50
INFO:tensorflow:Restoring parameters from ./mnist_convnet_model\model.ckpt-30002
INFO:tensorflow:Finished evaluation at 2018-02-10-13:51:56
INFO:tensorflow:Saving dict for global step 30002: accuracy = 0.988, global_step = 30002, loss = 0.05056255
{'accuracy': 0.988, 'loss': 0.05056255, 'global_step': 30002}


In [37]:
# Evaluate the model and print results
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_te},
    y=y_te,
    num_epochs=1,
    shuffle=False)

test_results = mnist_classifier.evaluate(input_fn=test_input_fn)

print(test_results)

INFO:tensorflow:Starting evaluation at 2018-02-10-13:37:50
INFO:tensorflow:Restoring parameters from ./mnist_convnet_model\model.ckpt-30002
INFO:tensorflow:Finished evaluation at 2018-02-10-13:37:53
INFO:tensorflow:Saving dict for global step 30002: accuracy = 0.9853333, global_step = 30002, loss = 0.064221136
{'accuracy': 0.9853333, 'loss': 0.064221136, 'global_step': 30002}
