In [4]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

## CNN piece by piece

In [None]:
# Converts features to 28x28x1 shape (in inferred batches, -1)
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

# Convolutional layer #1 with 32 filters
conv1 = tf.layers.conv2d(
    inputs = input_layer,
    filters = 32,
    kernel_size = [5, 5],
    padding = "same",
    activation = tf.nn.relu)

# Pooling layer #1
pool1 = tf.layers.max_pooling2d(inputs = conv1, 
                                pool_size = [2,2], 
                                strides = 2)


# Convolutional layer #2 with 64 filters
conv2 = tf.layers.conv2d(
    inputs = pool1,
    filters = 64,
    kernel_size = [5, 5],
    activation = tf.nn.relu)

# Pooling layer #2
pool2 = tf.layers.max_pooling2d(inputs = conv2, 
                                pool_size = [2,2], 
                                strides = 2)

# Now add dense layer to perform classification on features extracted by the convolution/pooling layers

# First, flatten tensor to two dimensions (width x height x channels)
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

# Connect dense layer
dense = tf.layers.dense(inputs = pool2_flat, 
                        units = 1024, 
                        activation = tf.nn.relu)

# Add dropout regularization to dense layer. 
# 40% of elements will be randomly dropped during training when training is TRUE
dropout = tf.layers.dropout(inputs = dense, rate = 0.4, training = mode == tf.estimator.ModeKeys.TRAIN)

# Resulting tensor, dropout is shape [batch_size, 1024]

# Logits layer returns raw values for our predictions
# We create a dense layer with 10 neurons (one for each target class, 0-9) with linear  activation
logits = tf.layers.dense(inputs = dropout, units = 10)

# Final output tensor, logits is shape [batch_size, 10]

### Generate predictions

In [None]:
# Convert the logits layer into two formats:
# (1) predicted class for each example (digit from 0-9)
# (2) probabilities for each possible target class

"""
Predictions

tf.argmax:
- Axis specifies the axis of the input tensor along which to find the greatest value
- Here, we want to find the largest value along the dimension with index 1
  which corresponds to our predictions in logits tensor of shape [batch_size, 10]

tf.nn.softmax:
- We derive probs from logits layer by applying softmax activation
"""

# Compile predictions in a dict, return an EstimatorSpec object
predictions = {
    "classes": tf.argmax(input=logits, axis=1),
    "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)


### Calculate loss
Loss for our CNN is the softmax cross-entropy of the logits layer and our labels.

In [None]:
# Labels tensor contains a list of prediction indices for our examples (e.g. [1, 5, ...])
# logits contains the linear outputs of our last layer
# tf.losses.sparse_softmax_cross_entropy calculates the softmax crossentropy 
# (aka: categorical crossentropy, negative log-likelihood) from these two inputs in an efficient, numerically stable way.
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)


### Configure training op
Configure model to optimize this loss value during training. We use a learning rate of  0.001 and stochastic gradient descent as optimization algorithm.

In [None]:
if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
    train_op = optimizer.minimize(
        loss = loss,
        global_step = tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode = mode, loss = loss, train_op = train_op)

### Add evaluation metrics

In [None]:
eval_metric_ops = {
    "accuracy": tf.metrics.accuracy(
        labels = labels, predictions = predictions["class"])
}
return tf.estimator.EstimatorSpec(
    mode = mode, loss = loss, eval_metric_ops = eval_metric_ops)

***************

## Complete CNN

In [24]:
def cnn_model_fn(features, labels, mode):
    """Model function for CNN."""
    # Input Layer
    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer #2 and Pooling Layer #2
    conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Dense Layer
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=10)

    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])
    }
    return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [25]:
# Load training and test data as np arrays
# Raw pixel values for 55,000 images of hand-drawn digits
# Training labels with values from 0-9

((train_data, train_labels),
 (eval_data, eval_labels)) = tf.keras.datasets.mnist.load_data()

train_data = train_data/np.float32(255) # Divide by 255
train_labels = train_labels.astype(np.int32) # not required

eval_data = eval_data/np.float32(255)
eval_labels = eval_labels.astype(np.int32) # not required

In [26]:
# Create an estimator for our model
# (estimators are a TensorFlow class for performing high-level model training, evaluation, and inference)

# Create estimator. 
# model_dir is where checkpoints (model data) will be saved
mnist_classifier = tf.estimator.Estimator(
    model_fn = cnn_model_fn, model_dir = "/tmp/mnist_covnet_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/mnist_covnet_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002049867F470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [27]:
# Set up a logging hook to track progress while the CNN is training
tensors_to_log = {"probabilities": "softmax_tensor"}

logging_hook = tf.train.LoggingTensorHook(
    tensors = tensors_to_log, every_n_iter = 50)

In [28]:
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x = {"x": train_data},
    y = train_labels,
    batch_size = 100,
    num_epochs = None,
    shuffle = True)

# Train one step and display the probabilities
mnist_classifier.train(
    input_fn = train_input_fn,
    steps = 1,
    hooks = [logging_hook])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/mnist_covnet_model\model.ckpt-1001
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1001 into /tmp/mnist_covnet_model\model.ckpt.
INFO:tensorflow:probabilities = [[0.07054292 0.11348545 0.09268121 0.06663964 0.13154176 0.09337026
  0.07955404 0.07471107 0.15346894 0.12400474]
 [0.04343146 0.10258989 0.11969012 0.08906882 0.08916412 0.08433247
  0.09387224 0.14944091 0.10355207 0.1248579 ]
 [0.06427707 0.0977134  0.17542368 0.15851511 0.07947324 0.03446219
  0.237709   0.03469205 0.06289919 0.0548351 ]
 [0.04771806 0.04658738 0.06006234 0.11168235 0.10869076 0.03986224
  0.0410329  0.33008516 0.06327008 0.15100871]
 [0.06166292 0.03492869 0.08907524 0.06793495 0.22011842 0.07113232
  0.08781281 0.0975274  0.12543428 0

INFO:tensorflow:loss = 1.745976, step = 1001
INFO:tensorflow:Saving checkpoints for 1002 into /tmp/mnist_covnet_model\model.ckpt.
INFO:tensorflow:Loss for final step: 1.745976.


<tensorflow.python.estimator.estimator.Estimator at 0x2049c995eb8>

In [29]:
# Now without logging each step, train the model longer but still in a reasonable time.
# Increasing number of steps to like 20,000 would increase accuracy

mnist_classifier.train(input_fn = train_input_fn, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/mnist_covnet_model\model.ckpt-1002
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1002 into /tmp/mnist_covnet_model\model.ckpt.
INFO:tensorflow:loss = 1.7618825, step = 1002
INFO:tensorflow:global_step/sec: 48.3765
INFO:tensorflow:loss = 1.5846971, step = 1102 (2.069 sec)
INFO:tensorflow:global_step/sec: 51.2172
INFO:tensorflow:loss = 1.3891101, step = 1202 (1.952 sec)
INFO:tensorflow:global_step/sec: 51.4261
INFO:tensorflow:loss = 1.2131184, step = 1302 (1.946 sec)
INFO:tensorflow:global_step/sec: 51.3381
INFO:tensorflow:loss = 1.0572499, step = 1402 (1.947 sec)
INFO:tensorflow:global_step/sec: 51.3549
INFO:tensorflow:loss = 1.0136278, step = 1502 (1.947 sec)
INFO:tensorflow:global_step/sec: 51.3734
INFO:tensorflo

<tensorflow.python.estimator.estimator.Estimator at 0x2049c995eb8>

In [30]:
# Evaluate the model
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": eval_data},
    y=eval_labels,
    num_epochs=1,
    shuffle=False)

eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-01-00:46:31
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/mnist_covnet_model\model.ckpt-2002
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-01-00:46:32
INFO:tensorflow:Saving dict for global step 2002: accuracy = 0.8495, global_step = 2002, loss = 0.5839219
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2002: /tmp/mnist_covnet_model\model.ckpt-2002
{'accuracy': 0.8495, 'loss': 0.5839219, 'global_step': 2002}
