In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
%matplotlib inline

In [2]:
# Load data
with np.load(os.path.join('data','mnist-60k.npz'), allow_pickle=False) as npz_file:
    # Load items into a dictionary
    mnist = dict(npz_file.items())
    
X = mnist['data'].astype(np.float32).reshape(-1, 28, 28, 1)
y = mnist['labels']

In [3]:
# scale and split
from sklearn.model_selection import train_test_split

# Create train/test sets
X_tr, X_te, y_tr, y_te = train_test_split(X,y,test_size=0.1, random_state=0)

# Split again into validation/test sets
X_cv, X_te, y_cv, y_te = train_test_split(X_te, y_te,test_size=0.5, random_state=0)

print("X_tr", X_tr.shape)
print("X_te", X_te.shape)
print("X_cv", X_cv.shape)

X_tr (54000, 28, 28, 1)
X_te (3000, 28, 28, 1)
X_cv (3000, 28, 28, 1)


### With an Estimator

In [17]:
## Model Function

def model_fn(features, labels, mode = "PREDICT"):
    input_layer = tf.reshape(features["X"], [-1, 28, 28, 1])
    y = labels
    
    # Convolutional layer 1 
    conv1 = tf.layers.conv2d(
        input_layer,                           # Input data
        filters=32,                  # 32 filters
        kernel_size=(5, 5),          # Kernel size: 5x5
        strides=(1, 1),              # Stride: 1
        padding='SAME',              # "same" padding
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0), # Small standard deviation
        name='conv1'                  # Add name
    )

    # Max pooling layer 1
    pool1 = tf.layers.max_pooling2d(
        conv1,                       # Input
        pool_size=(2, 2),            # Pool size: 2x
        strides=(2, 2),              # Stride: 2
        padding='SAME',              # "same" padding
        name='pool1'
    )

    # Convolutional layer 2
    conv2 = tf.layers.conv2d(
        pool1,                       # Input
        filters=64,                  # 64 filters
        kernel_size=(5, 5),          # Kernel size: 5x5
        strides=(1, 1),              # Stride: 1
        padding='SAME',              # "same" padding
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0),    # Small standard deviation
        name='conv2'                 # Add name
    )

    # Max pooling layer 2 (2x2, stride: 2) - TUNED
    pool2 = tf.layers.max_pooling2d(
        conv2,                       # input
        pool_size=(2, 2),            # pool size 2x2
        strides=(2, 2),              # stride 2
        padding='SAME'
    )

    # Flatten output
    flat_output = tf.contrib.layers.flatten(pool2)

    # Fully connected layer
    fc1 = tf.layers.dense(
        flat_output,                 # input
        1024,                         # 256 hidden units
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
        bias_initializer=tf.zeros_initializer()
    )

    if mode == tf.estimator.ModeKeys.TRAIN:
        fc1 = tf.layers.dropout(fc1, rate=0.70, seed=1)

    # logits
    logits = tf.layers.dense(
        fc1,                         # input
        10,                           # One output unit per category
        activation=None,             # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer()
    )

    # predictions
    predictions = {
      "classes": tf.argmax(input=logits, axis=1),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=logits)
    accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    ## Else if mode == "EVAL"
    
    # Compute predictions and accuracy
    #is_correct = tf.equal(y, predictions['classes'])
    #accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))

    eval_metric_ops = {
      "accuracy": accuracy,
      #"cost": loss,  
    }

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [18]:
config = tf.ConfigProto(
    # comment to run on GPU
    #device_count = {'GPU': 0}
)

# bring GPU memory usage down to a level where it won't crash
config.gpu_options.per_process_gpu_memory_fraction = 0.6

# create a config
estimator_config = tf.estimator.RunConfig(session_config=config)

# create the Estimator
mnist_classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir="./mnist_convnet_model", config=estimator_config)

# Create a training function
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_tr},
    y=y_tr,
    batch_size=32,       # small batch size so GPU doesn't crash
    num_epochs=None,     
    shuffle=True)

INFO:tensorflow:Using config: {'_model_dir': './mnist_convnet_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.6
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001FD5D017320>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [19]:
# train the model
mnist_classifier.train(input_fn=train_input_fn, steps=10000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./mnist_convnet_model\model.ckpt-35004
INFO:tensorflow:Saving checkpoints for 35005 into ./mnist_convnet_model\model.ckpt.
INFO:tensorflow:loss = 0.0002528891, step = 35005
INFO:tensorflow:global_step/sec: 37.2378
INFO:tensorflow:loss = 7.2806724e-05, step = 35105 (2.686 sec)
INFO:tensorflow:global_step/sec: 38.8225
INFO:tensorflow:loss = 6.1168685e-05, step = 35205 (2.576 sec)
INFO:tensorflow:global_step/sec: 38.974
INFO:tensorflow:loss = 0.00022460984, step = 35305 (2.566 sec)
INFO:tensorflow:global_step/sec: 38.7172
INFO:tensorflow:loss = 0.0010341842, step = 35405 (2.583 sec)
INFO:tensorflow:global_step/sec: 38.7774
INFO:tensorflow:loss = 0.0016988354, step = 35505 (2.579 sec)
INFO:tensorflow:global_step/sec: 38.7173
INFO:tensorflow:loss = 0.0002186491, step = 35605 (2.582 sec)
INFO:tensorflow:global_step/sec: 38.7621
INFO:tensorflow:loss = 0.00038186388, step = 35705 (2.581 sec)
INFO:tensorflow:g

INFO:tensorflow:global_step/sec: 38.853
INFO:tensorflow:loss = 0.00020217139, step = 42805 (2.574 sec)
INFO:tensorflow:global_step/sec: 38.8674
INFO:tensorflow:loss = 0.00029370555, step = 42905 (2.573 sec)
INFO:tensorflow:global_step/sec: 39.0349
INFO:tensorflow:loss = 0.0019593288, step = 43005 (2.562 sec)
INFO:tensorflow:global_step/sec: 38.7924
INFO:tensorflow:loss = 0.0005245799, step = 43105 (2.578 sec)
INFO:tensorflow:global_step/sec: 38.7323
INFO:tensorflow:loss = 0.00023317344, step = 43205 (2.582 sec)
INFO:tensorflow:global_step/sec: 38.8679
INFO:tensorflow:loss = 0.0020530154, step = 43305 (2.573 sec)
INFO:tensorflow:global_step/sec: 38.7886
INFO:tensorflow:loss = 0.0002627341, step = 43405 (2.587 sec)
INFO:tensorflow:global_step/sec: 38.6522
INFO:tensorflow:loss = 0.0014823888, step = 43505 (2.578 sec)
INFO:tensorflow:global_step/sec: 38.9133
INFO:tensorflow:loss = 0.00013957891, step = 43605 (2.570 sec)
INFO:tensorflow:global_step/sec: 38.8528
INFO:tensorflow:loss = 0.0016

<tensorflow.python.estimator.estimator.Estimator at 0x1fd5d2e0ba8>

In [20]:
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_cv},
    y=y_cv,
    num_epochs=1,
    shuffle=False)

eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)

print(eval_results)

INFO:tensorflow:Starting evaluation at 2018-02-11-11:01:38
INFO:tensorflow:Restoring parameters from ./mnist_convnet_model\model.ckpt-45004
INFO:tensorflow:Finished evaluation at 2018-02-11-11:01:39
INFO:tensorflow:Saving dict for global step 45004: accuracy = 0.98866665, global_step = 45004, loss = 0.052339565
{'accuracy': 0.98866665, 'loss': 0.052339565, 'global_step': 45004}


In [16]:
# Evaluate the model and print results
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_te},
    y=y_te,
    num_epochs=1,
    shuffle=False)

test_results = mnist_classifier.evaluate(input_fn=test_input_fn)

print(test_results)

INFO:tensorflow:Starting evaluation at 2018-02-11-10:52:50
INFO:tensorflow:Restoring parameters from ./mnist_convnet_model\model.ckpt-35004
INFO:tensorflow:Finished evaluation at 2018-02-11-10:52:50
INFO:tensorflow:Saving dict for global step 35004: accuracy = 0.98466665, global_step = 35004, loss = 0.06955772
{'accuracy': 0.98466665, 'loss': 0.06955772, 'global_step': 35004}
