In [50]:
import tensorflow as tf
import os
import glob
import pandas as pd
import numpy as np
tf.set_random_seed(42)
tf.logging.set_verbosity(tf.logging.INFO)  # for logging

In [51]:
def get_mnist_data():

    dirname, _ = os.path.split(os.path.dirname(os.path.abspath("__file__")))
    data_dir = os.path.join(dirname, "data")
    MNIST_dir = os.path.join(data_dir, "MNIST")

    if os.path.exists(MNIST_dir):
        all_files = glob.glob(MNIST_dir + "/*.csv")
        train_path = all_files[0] if 'train' in all_files[0] else all_files[1]
        test_path  = all_files[0] if 'test' in all_files[0] else all_files[1]

        train = pd.read_csv(train_path, header=None)
        X_train = train.iloc[:,1:].values/255.0
        y_train = train.iloc[:,0].values

        test = pd.read_csv(test_path, header=None)
        X_test = test.iloc[:, 1:].values/255.0
        y_test = test.iloc[:, 0].values

        return X_train, y_train, X_test, y_test
    else:
        raise IOError("Path: {0} not found!".format(MNIST_dir))

In [52]:
# get the normalized MNIST data
X_train, y_train, X_test, y_test = get_mnist_data()

In [4]:
y_train

array([5, 0, 4, ..., 5, 6, 8])

### Define input function for Training

In [5]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x = {"x": np.array(X_train)},
    y = np.array(y_train),
    num_epochs=None,
    shuffle=True
)

In [6]:
train_input_fn()

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.


({'x': <tf.Tensor 'random_shuffle_queue_DequeueMany:1' shape=(128, 784) dtype=float64>},
 <tf.Tensor 'random_shuffle_queue_DequeueMany:2' shape=(128,) dtype=int64>)

### Define input function for Evaluation 

In [7]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x = {"x": np.array(X_test)},
    y = np.array(y_test),
    num_epochs=1,
    shuffle=False
)

### Define neural network 

In [8]:
def dnn_model(inputs, reuse, is_training=False):
    with tf.variable_scope("DNN_NET", reuse=reuse):
        x = inputs["x"]
        fc1 = tf.layers.dense(x, units=512, activation=tf.nn.relu)
        fc1 = tf.layers.dropout(fc1, rate=0.4, training=is_training)
        fc2 = tf.layers.dense(fc1, units=256, activation=tf.nn.relu)
        fc2 = tf.layers.dropout(fc2, rate=0.4, training=is_training)
        fc3 = tf.layers.dense(fc2, units=128, activation=tf.nn.relu)
        fc3 = tf.layers.dropout(fc3, rate=0.4, training=is_training)
        out = tf.layers.dense(fc3, units=10)

        return out

In [9]:
def cnn_model(inputs, reuse, is_training=False):
    with tf.variable_scope("Conv_NET", reuse=reuse):
        ## Input layer
        # MNIST data input is a 1-D vector of 784 features.
        # Reshape each example to match the format: [batch_size X img_height X img_width X channel]

        input_layer = tf.reshape(inputs["x"], shape=[-1, 28, 28, 1])
        
        ## Convolutional Layer and Pooling Layer#1
        # Convolution layer with 32 filters with kernel size [5 X 5] with ReLU activation function.
        conv_layer1 = tf.layers.Conv2D(
            filters=32,
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu
        )
        conv1 = conv_layer1.apply(inputs=input_layer)
        # Max pooling with filter size [2 X 2] and stride of 2 (specifies pooled region do not overlap)
        pool1 = tf.layers.max_pooling2d(
            inputs=conv1,
            pool_size=[2, 2],
            strides=2
        )
        
        ## Convolutional Layer and Pooling Layer#2
        # Convolution layer with 64 filters with kernel size [5 X 5] with ReLU activation function.
        conv_layer2 = tf.layers.Conv2D(
            filters=64,
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu
        )
        conv2 = conv_layer2.apply(inputs=pool1)
        # Max pooling with filter size [2 X 2] and stride of 2 (specifies pooled region do not overlap)
        pool2 = tf.layers.max_pooling2d(
            inputs=conv2,
            pool_size=[2, 2],
            strides=2
        )
            
        # Flatten the data to a 1-D vector for the Dense layer
        fc = tf.layers.flatten(pool2)

        # Dense Layer
        dense = tf.layers.dense(inputs=fc, units=1024, activation=tf.nn.relu)
        dense = tf.layers.dropout(inputs=dense, rate=0.25, training=is_training)

        # Output Layer for MNIST 10 class prediction
        out = tf.layers.dense(inputs=dense, units=10)

        return out

### Define the model function 

In [38]:
def model_funct(features, labels, mode, params):
    # logits = dnn_model(features, is_training=True, reuse=False)
    # logits_ = dnn_model(features, reuse=True)
    
    logits  = cnn_model(features, is_training=True, reuse=False)
    logits_ = cnn_model(features, reuse=True)
    
    # predictions
    prediction = tf.nn.softmax(logits_)
    prediction_classes = tf.argmax(prediction, axis=1)
    
    # prediction mode
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, 
                                          predictions={'class_ids': prediction_classes[:, tf.newaxis],
                                                       'prediction': prediction_classes})
    
    # define the loss function to be optimized by 
    # 1) first calculating the cross-entropy between theoutput of the neural network and 
    #    the true labels for the input data.
    # 2) then reduce the cross-entropy batch-tensor to a single number which can be used 
    #    in the optimization of the neural network
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, 
                                                                   labels=tf.cast(labels, tf.int32))
    loss_op = tf.reduce_mean(cross_entropy)
    
    # define the optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    
    # define train_op
    train_op = optimizer.minimize(loss_op,
                                  global_step=tf.train.get_global_step())
    
    # evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=prediction_classes)
    
    return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss_op,
            train_op=train_op,
            eval_metric_ops={"accuracy": acc_op}
    )

### Define the parameters

In [39]:
learning_rate  = 0.01
params = {"learning_rate": learning_rate}

### Build the estimator  

In [40]:
model = tf.estimator.Estimator(model_funct, model_dir='../saved_models/cnn_estimator', params=params)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '../saved_models/cnn_estimator', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4a75b6d9e8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


### Train the model

In [41]:
model.train(input_fn=train_input_fn, steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../saved_models/cnn_estimator/model.ckpt-0
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into ../saved_models/cnn_estimator/model.ckpt.
INFO:tensorflow:loss = 2.31209811496876, step = 0
INFO:tensorflow:global_step/sec: 20.0689
INFO:tensorflow:loss = 0.09515441882038242, step = 100 (4.984 sec)
INFO:tensorflow:global_step/sec: 19.7995
INFO:tensorflow:loss = 0.09260443545476586, step = 200 (5.051 sec)
INFO:tensorflow:global_step/sec: 20.1331
INFO:tensorflow:loss = 0.24140160856187484, step = 300 (4.967 sec)
INFO:tensorflow:global_step/sec: 20.2791
INFO:tensorflow:loss = 0.12383507908875759, step = 400 (4.931 sec)
INFO:tensorflow:global_step/sec: 20.0919
INFO:tensorflow:loss = 0.03839802769472121, step = 500 (4.978 sec)


<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7f4a75b6d1d0>

### Evaluate the model 

In [42]:
result = model.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-11-06-21:30:48
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../saved_models/cnn_estimator/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-06-21:30:49
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.981, global_step = 1000, loss = 0.071738005
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: ../saved_models/cnn_estimator/model.ckpt-1000


In [43]:
result

{'accuracy': 0.981, 'loss': 0.071738005, 'global_step': 1000}

In [44]:
print("Classification accuracy: {0:.2%}".format(result["accuracy"]))

Classification accuracy: 98.10%


### Make some predictions 

In [45]:
some_images = X_test[0:9]
expected = y_test[0:9]

In [46]:
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": some_images},
    num_epochs=1,
    shuffle=False
)

In [47]:
predictions = model.predict(input_fn=predict_input_fn)

In [48]:
cls_pred = list(predictions)[:]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../saved_models/cnn_estimator/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [33]:
[cls_pred[i]['class_ids'] for i in range(len(cls_pred))]

[array([7]),
 array([2]),
 array([1]),
 array([0]),
 array([4]),
 array([1]),
 array([4]),
 array([9]),
 array([5])]

In [49]:
template = ('\nPrediction is "{}", expected "{}"')

for pred_dict, expec in zip(cls_pred, expected):
    class_id = pred_dict['class_ids'][0]
    print(template.format(class_id, expec))


Prediction is "7", expected "7"

Prediction is "2", expected "2"

Prediction is "1", expected "1"

Prediction is "0", expected "0"

Prediction is "4", expected "4"

Prediction is "1", expected "1"

Prediction is "4", expected "4"

Prediction is "9", expected "9"

Prediction is "5", expected "5"
