# Introduction
Build a convolutional neural network model to recoginize the handwritten digits in MNIST

In [13]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

CNNs apply a series of filters to the raw pixel data of an image to extract and learn higher-level features, which the model can then use for classification.    
CNNs contains three components:

- Convolutional layers
- Pooling layers
- Dense layers, which perform classification on the features

CNN architecture: n*(Convolutional layers+Pooling layers)+Dense layers + final Dense layer(softmax)    

The tf.layers module contains methods to create each of the three layer types:    

- conv2d()
- max_pooling2d()
- dense()

In [25]:

def cnn_model_fn(features, labels, mode):
  """Model function for CNN."""
  # Input Layer
  # Reshape X to 4-D tensor: [batch_size, width, height, channels]
  # MNIST images are 28x28 pixels, and have one color channel
  # print('features:\n', features["x"]) shape=(100, 784)
  input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) #-1 means this dimension should be dynamically
    #computed based on the number fo input values in features["x"]
    #e.g. if we feed examples into our model in batches of 5, features['x'] will contain 784*5 values

  # Convolutional Layer #1
  # Computes 32 features using a 5x5 filter with ReLU activation.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 28, 28, 1]
  # Output Tensor Shape: [batch_size, 28, 28, 32]
  conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32, #ghwan
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

  # Pooling Layer #1
  # First max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 28, 28, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 32] 32 channels
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

  # Convolutional Layer #2
  # Computes 64 features using a 5x5 filter.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 14, 14, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 64]
  conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

  # Pooling Layer #2
  # Second max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 14, 14, 64]
  # Output Tensor Shape: [batch_size, 7, 7, 64]
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

  # Flatten tensor into a batch of vectors
  # Input Tensor Shape: [batch_size, 7, 7, 64]
  # Output Tensor Shape: [batch_size, 7 * 7 * 64]
  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

  # Dense Layer
  # Densely connected layer with 1024 neurons
  # Input Tensor Shape: [batch_size, 7 * 7 * 64]
  # Output Tensor Shape: [batch_size, 1024]
  dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu) #The units argument specifies the number of neurons in the dense layer

  # Add dropout operation; 0.6 probability that element will be kept
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)# dropout will only be performed if training is True

  # Logits layer
  # Input Tensor Shape: [batch_size, 1024]
  # Output Tensor Shape: [batch_size, 10]
  logits = tf.layers.dense(inputs=dropout, units=10)

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1), #find the index of the element with highest raw value
      # axis=1 we want find the largest value along the dimension with with index of 1
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Calculate Loss (for both TRAIN and EVAL modes)
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

  #onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
  #loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)

  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
  eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])}

  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)


def main(unused_argv):
  # Load training and eval data
  mnist = tf.contrib.learn.datasets.load_dataset("mnist")
  train_data = mnist.train.images  # Returns np.array
  train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
  eval_data = mnist.test.images  # Returns np.array
  eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

  # Create the Estimator
  #A TensorFlow class for performing high-level model training, evaluation, and inference for the model
  # model_dir specifies the directory where model data(checkpoints) will be saved
  mnist_classifier = tf.estimator.Estimator(
      model_fn=cnn_model_fn, model_dir="./mnist_convnet_model")

  # Set up logging for predictions
  # Log the values in the "Softmax" tensor with label "probabilities"
  tensors_to_log = {"probabilities": "softmax_tensor"}
  logging_hook = tf.train.LoggingTensorHook(
      tensors=tensors_to_log, every_n_iter=100)#probabilities should be logged after every 100 steps of training.

  # Train the model
  train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": train_data},
      y=train_labels,
      batch_size=100,
      num_epochs=None,#the model will train until the specified number of steps is reached.
      shuffle=True)
    
  mnist_classifier.train(
      input_fn=train_input_fn,
      steps=20000,
      hooks=[logging_hook])# will be triggered during training.

  # Evaluate the model and print results
  eval_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": eval_data},
      y=eval_labels,
      num_epochs=1,
      shuffle=False)
  eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
  print('eval_results:\n', eval_results)


In [None]:
if __name__ == "__main__":
  tf.app.run()

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './mnist_convnet_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1c33ce62e8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was 

INFO:tensorflow:loss = 2.3053489, step = 1
INFO:tensorflow:global_step/sec: 4.08832
INFO:tensorflow:probabilities = [[0.10208429 0.09785339 0.10069627 0.09063467 0.09754754 0.0886066
  0.12723622 0.11184037 0.08372994 0.09977079]
 [0.10635014 0.09868658 0.10142502 0.09607047 0.09556601 0.10369857
  0.10403452 0.10732426 0.09806115 0.08878328]
 [0.09923414 0.09722896 0.09975617 0.08970919 0.10079145 0.10442867
  0.0992864  0.11560465 0.08950713 0.10445324]
 [0.10087507 0.10565156 0.09415913 0.08570309 0.10517281 0.09257831
  0.10480505 0.09627762 0.10316929 0.11160802]
 [0.10515556 0.0832479  0.10010336 0.0902073  0.10442088 0.0892093
  0.10187056 0.10109252 0.11405973 0.11063287]
 [0.10261523 0.10565913 0.09693601 0.08561442 0.10270259 0.08988082
  0.10186658 0.11419103 0.10044538 0.10008886]
 [0.10447587 0.09424081 0.09913945 0.10092252 0.10099184 0.08617829
  0.11247186 0.10313421 0.08879234 0.10965282]
 [0.1113504  0.0911716  0.09798446 0.09277473 0.09980988 0.08857284
  0.11156796 

INFO:tensorflow:loss = 2.2989833, step = 101 (24.466 sec)
INFO:tensorflow:global_step/sec: 3.95867
INFO:tensorflow:probabilities = [[0.10534564 0.10283708 0.10417049 0.0977988  0.09430248 0.08951548
  0.1029956  0.11202016 0.08571804 0.10529625]
 [0.12225939 0.09391458 0.09157826 0.08815975 0.10330002 0.07302845
  0.11355528 0.10243523 0.1066423  0.10512682]
 [0.0995443  0.10034095 0.09518736 0.08955342 0.09628465 0.10407316
  0.09940159 0.11694757 0.09788083 0.10078613]
 [0.09948545 0.08649273 0.09370459 0.09208474 0.11074292 0.09694776
  0.10892618 0.09743587 0.10906786 0.10511191]
 [0.1038491  0.09181367 0.10417922 0.09368172 0.10003942 0.0963634
  0.09987885 0.097089   0.10033661 0.11276904]
 [0.10853285 0.07841495 0.10969371 0.10771049 0.09532449 0.10026873
  0.09767724 0.10892267 0.09036704 0.10308784]
 [0.12108215 0.09214576 0.10679898 0.09267533 0.10208093 0.09845873
  0.10759998 0.1098648  0.08584298 0.08345043]
 [0.10081442 0.09076931 0.10365778 0.08815721 0.11880936 0.095480

INFO:tensorflow:loss = 2.2454376, step = 201 (25.264 sec)
INFO:tensorflow:global_step/sec: 4.28514
INFO:tensorflow:probabilities = [[0.09490272 0.09414977 0.09503643 0.10573516 0.10911582 0.08549266
  0.11212755 0.10330006 0.09949011 0.10064971]
 [0.10057664 0.08571897 0.09553038 0.1016348  0.10386359 0.09667819
  0.09463757 0.10430368 0.10031581 0.11674037]
 [0.11561858 0.08847307 0.09973893 0.09124807 0.11236044 0.08451259
  0.12390856 0.08931696 0.10836305 0.08645982]
 [0.1096684  0.09138932 0.08510524 0.09233028 0.10721546 0.08600603
  0.09870054 0.11701687 0.09650801 0.11605977]
 [0.09334125 0.09958957 0.10337885 0.09797359 0.09985621 0.09196943
  0.11350633 0.11406877 0.08508663 0.10122936]
 [0.12289163 0.08281961 0.11711623 0.1028912  0.08184651 0.08968747
  0.1038515  0.08980472 0.10493784 0.10415329]
 [0.10897768 0.08270942 0.11252355 0.12705706 0.09580105 0.08753813
  0.0966235  0.09448282 0.09404937 0.1002374 ]
 [0.10304215 0.09862729 0.08675246 0.08554622 0.11633351 0.08600

INFO:tensorflow:loss = 2.232041, step = 301 (23.347 sec)
INFO:tensorflow:global_step/sec: 4.38671
INFO:tensorflow:probabilities = [[0.09301325 0.09012716 0.10636402 0.09749316 0.11056768 0.09095441
  0.11793566 0.0927904  0.10983396 0.09092029]
 [0.10662663 0.08257006 0.09966649 0.12340669 0.11201507 0.08413188
  0.10695492 0.08778051 0.10238498 0.09446274]
 [0.10621448 0.08720709 0.10095817 0.09620945 0.09897932 0.09380738
  0.09996462 0.10663547 0.10385346 0.10617054]
 [0.10701063 0.09844771 0.10499769 0.11639296 0.09116992 0.09372336
  0.10331727 0.0999255  0.09301199 0.09200305]
 [0.11550618 0.08898613 0.09532398 0.12340835 0.09895236 0.07918943
  0.08012946 0.11103851 0.09767634 0.10978922]
 [0.10284776 0.07346736 0.12738608 0.1020456  0.09393828 0.10045839
  0.09595165 0.10643201 0.09350933 0.10396359]
 [0.11362632 0.08591859 0.12829663 0.09406413 0.08818994 0.10108361
  0.08462393 0.09629572 0.10162836 0.10627287]
 [0.11227253 0.08417068 0.11424708 0.09780052 0.08837222 0.090980

INFO:tensorflow:loss = 2.2009377, step = 401 (22.778 sec)
INFO:tensorflow:global_step/sec: 4.03841
INFO:tensorflow:probabilities = [[0.09090468 0.08790918 0.08108412 0.10558738 0.10945304 0.08977828
  0.10710445 0.09609966 0.1200778  0.11200143]
 [0.1212029  0.07780061 0.09762535 0.09799096 0.09864416 0.11263138
  0.09522726 0.10495077 0.08883175 0.10509486]
 [0.09136581 0.10688281 0.10642631 0.09482498 0.10103195 0.08411068
  0.10221711 0.10362145 0.10104557 0.10847329]
 [0.13216674 0.06726453 0.13136771 0.09241708 0.10480011 0.09130847
  0.10047885 0.0918667  0.10997756 0.07835224]
 [0.10504399 0.09097792 0.10677657 0.08721259 0.1126274  0.09424359
  0.10163677 0.09700119 0.09309865 0.11138131]
 [0.09082693 0.10595936 0.10472024 0.10876268 0.09624985 0.09717963
  0.0955795  0.10354377 0.09883054 0.09834749]
 [0.12439515 0.07572514 0.10564413 0.08788139 0.11618605 0.09145524
  0.10529458 0.08403705 0.09951306 0.10986832]
 [0.10256572 0.08525427 0.10827936 0.08196537 0.09189955 0.09767

INFO:tensorflow:loss = 2.2011507, step = 501 (24.774 sec)
INFO:tensorflow:global_step/sec: 4.13194
INFO:tensorflow:probabilities = [[0.10020874 0.09003697 0.0959001  0.08961888 0.12087274 0.07407615
  0.11117874 0.0996495  0.10068353 0.11777462]
 [0.09665459 0.09132648 0.14203183 0.11582255 0.0720038  0.08441447
  0.11416107 0.08288936 0.09269483 0.10800104]
 [0.10952764 0.07325652 0.1031531  0.09700403 0.1049734  0.09404702
  0.10937023 0.10913545 0.09923518 0.10029749]
 [0.12031826 0.09233885 0.08962598 0.10621658 0.08763099 0.08820304
  0.11943877 0.09642938 0.09937887 0.10041929]
 [0.11184198 0.07129089 0.1163205  0.13415124 0.09334732 0.0859111
  0.09366629 0.07764927 0.12247001 0.0933513 ]
 [0.1185708  0.08020788 0.11909994 0.10854395 0.09117575 0.09389676
  0.11096969 0.09614369 0.08911502 0.0922765 ]
 [0.10010979 0.10322279 0.10050675 0.10109394 0.09596412 0.09314533
  0.0958536  0.10411415 0.1044801  0.10150939]
 [0.12268776 0.08110597 0.10555657 0.09619123 0.08228691 0.092647

INFO:tensorflow:loss = 2.1232848, step = 601 (24.196 sec)
INFO:tensorflow:global_step/sec: 3.93665
INFO:tensorflow:probabilities = [[0.14906062 0.06972332 0.0814106  0.12136851 0.09642592 0.08550358
  0.10523009 0.10435933 0.08934193 0.09757617]
 [0.09691217 0.07750996 0.11761706 0.11956313 0.08556028 0.10192313
  0.10563093 0.09730384 0.09559116 0.10238839]
 [0.10800069 0.08788086 0.08741254 0.10106397 0.09635343 0.09998094
  0.11089451 0.1104049  0.10932028 0.08868783]
 [0.10437553 0.08768126 0.0891265  0.09304947 0.10896578 0.08119275
  0.10174296 0.10776806 0.09867967 0.12741798]
 [0.09743714 0.08493468 0.11385266 0.10373856 0.08972514 0.08647256
  0.12502502 0.09754672 0.09572185 0.10554558]
 [0.07882962 0.08543358 0.10407931 0.10259783 0.1009009  0.087464
  0.08213164 0.14750183 0.10812006 0.10294115]
 [0.11894625 0.08442692 0.10249437 0.14377256 0.07578892 0.09986115
  0.09440803 0.09891295 0.09472571 0.08666319]
 [0.13441728 0.08048958 0.09097381 0.09504724 0.11129048 0.1026339

INFO:tensorflow:loss = 2.092686, step = 701 (25.398 sec)
