# High-level TF Example - tf.estimator.Estimator()

This is a much more concise example of using TF and hopefully is the way forward. The multi-GPU example will probably build off this since it has a very nice wrapper and good tensorboard support.

See example: https://github.com/BobLiu20/Classification_Nets/blob/master/tensorflow/training/train_estimator.py

In [1]:
import numpy as np
import os
import sys
import tensorflow as tf
from common.params import *
from common.utils import *
slim = tf.contrib.slim

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("Tensorflow: ", tf.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.14.1
Tensorflow:  1.4.0
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [4]:
def create_symbol(X, training, n_classes):
    print("Training mode: ", training==True)
    # Tensorflow requires a flag for training in dropout
    conv1 = tf.layers.conv2d(X['features'], activation=tf.nn.relu, filters=50, kernel_size=(3, 3), 
                             padding='same', data_format='channels_first')
    conv2 = tf.layers.conv2d(conv1, filters=50, kernel_size=(3, 3), 
                             padding='same', data_format='channels_first')
    pool1 = tf.layers.max_pooling2d(conv2, pool_size=(2, 2), strides=(2, 2), 
                                    padding='valid', data_format='channels_first')
    relu2 = tf.nn.relu(pool1)
    drop1 = tf.layers.dropout(relu2, 0.25, training=training)
    
    conv3 = tf.layers.conv2d(drop1, activation=tf.nn.relu, filters=100, kernel_size=(3, 3), 
                             padding='same', data_format='channels_first')
    conv4 = tf.layers.conv2d(conv3, filters=100, kernel_size=(3, 3), 
                             padding='same', data_format='channels_first')
    pool2 = tf.layers.max_pooling2d(conv4, pool_size=(2, 2), strides=(2, 2), 
                                    padding='valid', data_format='channels_first')
    relu4 = tf.nn.relu(pool2)
    drop2 = tf.layers.dropout(relu4, 0.25, training=training)   
    
    flatten = tf.reshape(drop2, shape=[-1, 100*8*8])
    fc1 = tf.layers.dense(flatten, 512, activation=tf.nn.relu)
    drop3 = tf.layers.dropout(fc1, 0.5, training=training)
    logits = tf.layers.dense(drop3, n_classes, name='output')
    return logits

In [5]:
def model_fn(features, labels, mode, params):
    # Create symbol
    sym = create_symbol(X=features, 
                        training=(mode == tf.estimator.ModeKeys.TRAIN),
                        n_classes=params['n_classes'])
    # Predictions
    predictions = tf.argmax(tf.concat(sym, 0), 1)   
    # ModeKeys.PREDICT
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, 
                                          predictions={"output": predictions})
    # Optimizer & Loss
    optimizer = tf.train.MomentumOptimizer(learning_rate=params['lr'], 
                                           momentum=params['momentum'])
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=sym, labels=labels)
    loss = tf.reduce_mean(xentropy)
    # Eval metric ops
    eval_metric_ops = {"acc": slim.metrics.streaming_accuracy(predictions, labels)}
    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=optimizer.minimize(loss, tf.train.get_or_create_global_step()),
        eval_metric_ops=eval_metric_ops)

In [6]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 643 ms, sys: 581 ms, total: 1.22 s
Wall time: 1.22 s


In [7]:
%%time
# Create Estimator
nn = tf.estimator.Estimator(model_fn=model_fn,
                            params={"lr":LR, 
                                    "momentum":MOMENTUM,
                                    "n_classes":N_CLASSES},
                            config=tf.estimator.RunConfig(
                                log_step_count_steps=1000,
                                save_summary_steps=1000))

INFO:tensorflow:Using config: {'_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_task_type': 'worker', '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_log_step_count_steps': 1000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f814d269cf8>, '_is_chief': True, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_keep_checkpoint_max': 5, '_task_id': 0, '_save_summary_steps': 1000, '_model_dir': '/tmp/tmpq9y0ghxl', '_session_config': None, '_service': None}
CPU times: user 0 ns, sys: 3.72 ms, total: 3.72 ms
Wall time: 3.18 ms


In [8]:
%%time
# Train Estimator: 60s
nn.train(input_fn=tf.estimator.inputs.numpy_input_fn(
    x={"features":x_train}, 
    y=y_train,
    batch_size=BATCHSIZE,
    num_epochs=10, 
    shuffle=True))

Training mode:  True
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpq9y0ghxl/model.ckpt.
INFO:tensorflow:loss = 2.3170414, step = 1
INFO:tensorflow:loss = 2.2064767, step = 101 (0.897 sec)
INFO:tensorflow:loss = 2.1269495, step = 201 (0.740 sec)
INFO:tensorflow:loss = 1.786201, step = 301 (0.759 sec)
INFO:tensorflow:loss = 1.8294327, step = 401 (0.734 sec)
INFO:tensorflow:loss = 2.0215316, step = 501 (0.727 sec)
INFO:tensorflow:loss = 1.8027611, step = 601 (0.729 sec)
INFO:tensorflow:loss = 1.655209, step = 701 (0.727 sec)
INFO:tensorflow:loss = 1.5047133, step = 801 (0.779 sec)
INFO:tensorflow:loss = 1.364079, step = 901 (0.759 sec)
INFO:tensorflow:global_step/sec: 131.782
INFO:tensorflow:loss = 1.8369548, step = 1001 (0.737 sec)
INFO:tensorflow:loss = 1.3392849, step = 1101 (0.744 sec)
INFO:tensorflow:loss = 1.44081, step = 1201 (0.732 sec)
INFO:tensorflow:loss = 1.1232271, step = 1301 (0.737 sec)
INFO:tensorflow:loss = 1.246576, ste

<tensorflow.python.estimator.estimator.Estimator at 0x7f814d269978>

In [9]:
%%time
# Evaluate estimator: 1s
# Accuracy: 78%
nn.evaluate(input_fn=tf.estimator.inputs.numpy_input_fn(
    x={"features":x_test}, 
    y=y_test,
    shuffle=False))

Training mode:  False
INFO:tensorflow:Starting evaluation at 2018-03-10-02:01:29
INFO:tensorflow:Restoring parameters from /tmp/tmpq9y0ghxl/model.ckpt-7813
INFO:tensorflow:Finished evaluation at 2018-03-10-02:01:30
INFO:tensorflow:Saving dict for global step 7813: acc = 0.7815, global_step = 7813, loss = 0.6418523
CPU times: user 965 ms, sys: 129 ms, total: 1.09 s
Wall time: 1.03 s


{'acc': 0.7815, 'global_step': 7813, 'loss': 0.6418523}