building input functions with tf.contrib.learn
https://www.tensorflow.org/get_started/input_fn


Preprocess and feed data into tensorflow models

In [5]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools

import pandas as pd
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)


In [3]:
feature_column_data = [1, 2.4, 0, 9.9, 3, 120]
feature_tensor = tf.constant(feature_column_data)
feature_tensor


<tf.Tensor 'Const_1:0' shape=(6,) dtype=float32>

Practice with Boston host dataset

In [6]:
data_train = '/tmp/data/boston_train.csv'
data_test = '/tmp/data/boston_test.csv'
prediction = '/tmp/data/boston_predict.csv'

COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
           "dis", "tax", "ptratio", "medv"]
FEATURES = ["crim", "zn", "indus", "nox", "rm",
            "age", "dis", "tax", "ptratio"]
LABEL = "medv"

training_set = pd.read_csv(data_train, skipinitialspace=True,
                           skiprows=1, names=COLUMNS)
test_set = pd.read_csv(data_test, skipinitialspace=True,
                       skiprows=1, names=COLUMNS)
prediction_set = pd.read_csv(prediction, skipinitialspace=True,
                             skiprows=1, names=COLUMNS)



    Create featureColumns for input data

In [9]:
feature_cols = [tf.contrib.layers.real_valued_column(k) for k in FEATURES]

In [10]:
regressor = tf.contrib.learn.DNNRegressor(feature_columns=feature_cols,
                                         hidden_units=[10,10],
                                         model_dir='/tmp/boston_model')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11fcbfa90>, '_model_dir': '/tmp/boston_model', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_master': ''}


    to pass input to regressor, create a function which accepts a pandas df and returns a feature column and label values as Tensor s

In [11]:
def input_fn(data_set):
    feature_cols = {k:tf.constant(data_set[k].values)
                   for k in FEATURES}
    labels = tf.constant(data_set[LABEL].values)
    return feature_cols, labels


In [12]:
input_fn(test_set)

({'age': <tf.Tensor 'Const_7:0' shape=(100,) dtype=float64>,
  'crim': <tf.Tensor 'Const_2:0' shape=(100,) dtype=float64>,
  'dis': <tf.Tensor 'Const_8:0' shape=(100,) dtype=float64>,
  'indus': <tf.Tensor 'Const_4:0' shape=(100,) dtype=float64>,
  'nox': <tf.Tensor 'Const_5:0' shape=(100,) dtype=float64>,
  'ptratio': <tf.Tensor 'Const_10:0' shape=(100,) dtype=float64>,
  'rm': <tf.Tensor 'Const_6:0' shape=(100,) dtype=float64>,
  'tax': <tf.Tensor 'Const_9:0' shape=(100,) dtype=int64>,
  'zn': <tf.Tensor 'Const_3:0' shape=(100,) dtype=float64>},
 <tf.Tensor 'Const_11:0' shape=(100,) dtype=float64>)

In [13]:
regressor.fit(input_fn=lambda: input_fn(training_set), steps=5000)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/boston_model/model.ckpt.
INFO:tensorflow:loss = 1613.15, step = 1
INFO:tensorflow:global_step/sec: 1387.58
INFO:tensorflow:loss = 100.575, step = 101 (0.073 sec)
INFO:tensorflow:global_step/sec: 1772.3
INFO:tensorflow:loss = 85.1315, step = 201 (0.056 sec)
INFO:tensorflow:global_step/sec: 1789.07
INFO:tensorflow:loss = 78.1854, step = 301 (0.056 sec)
INFO:tensorflow:global_step/sec: 1587.48
INFO:tensorflow:loss = 73.497, step = 401 (0.063 sec)
INFO:tensorflow:global_step/sec: 1684.32
INFO:tensorflow:loss = 70.4283, step = 501 (0.059 sec)
INFO:tensorflow:global_step/sec:

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x11fcbff90>, 'hidden_units': [10, 10], 'feature_columns': (_RealValuedColumn(column_name='crim', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='zn', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='indus', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='nox', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='rm', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='age', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='dis', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='tax', dimension=1, default_value=None, dtype=tf.float32,

In [15]:
#evaluate
ev = regressor.evaluate(input_fn=lambda: input_fn(test_set), steps=1)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Starting evaluation at 2017-07-30-00:58:24
INFO:tensorflow:Restoring parameters from /tmp/boston_model/model.ckpt-5000
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-07-30-00:58:25
INFO:tensorflow:Saving dict for global step 5000: global_step = 5000, loss = 13.8922


In [17]:
loss_score = ev['loss']
print("Loss: {0:f}".format(loss_score))

Loss: 13.892195


In [18]:
loss_score

13.892195

In [20]:
#predict using prediction set
y = regressor.predict(input_fn=lambda: input_fn(prediction_set))
predictions=list(itertools.islice(y,6))
print ('predictions: {}'.format(str(predictions)))

Instructions for updating:
Please switch to predict_scores, or set `outputs` argument.
INFO:tensorflow:Restoring parameters from /tmp/boston_model/model.ckpt-5000
predictions: [34.460438, 17.228466, 21.150135, 35.438984, 15.545377, 18.189079]
