In [1]:
import itertools
 
import pandas as pd
import tensorflow as tf
 
tf.logging.set_verbosity(tf.logging.INFO)

  from ._conv import register_converters as _register_converters


## 1. 定义数据集中的列名COLUMNS

为了区分标签的feature，还要定义FEATURES和LABEL。

In [26]:
# 定义数据集中的列名COLUMNS。为了区分标签的feature，还要定义FEATURES和LABEL。

COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
           "dis", "tax", "ptratio", "medv"]
FEATURES = ["crim", "zn", "indus", "nox", "rm",
            "age", "dis", "tax", "ptratio"]
LABEL = "medv"
 
training_set = pd.read_csv("boston_train.csv", skipinitialspace=True,
                           skiprows=1, names=COLUMNS)
test_set = pd.read_csv("boston_test.csv", skipinitialspace=True,
                       skiprows=1, names=COLUMNS)
prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True,
                             skiprows=1, names=COLUMNS)

# print(training_set.head(2))
# print(test_set.head(2))
print(training_set.head(2))

training_set[LABEL].values # array([...])
tf.constant(training_set[LABEL].values)

      crim   zn  indus    nox     rm   age     dis  tax  ptratio  medv
0   2.3004  0.0  19.58  0.605  6.319  96.1  2.1000  403     14.7  23.8
1  13.3598  0.0  18.10  0.693  5.887  94.7  1.7821  666     20.2  12.7


<tf.Tensor 'Const_3:0' shape=(400,) dtype=float64>

## 2. 定义FeatureColumns并创建回归

接下来，为输入数据创建一个FeatureColumns 列表，该列表正式指用于训练的特征集。

因为数据集中的所有特征都包含连续值，所以您可以使用tf.contrib.layers.real_valued_column()函数创建FeatureColumn ：

In [16]:
feature_cols = [tf.contrib.layers.real_valued_column(k) for k in FEATURES]
feature_cols

[_RealValuedColumn(column_name='crim', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='zn', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='indus', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='nox', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='rm', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='age', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='dis', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='tax', dimension=1, default_value=None, dtype=tf.float32, normalizer=None),
 _RealValuedColumn(column_name='ptratio', dimension=1, default_value=None, dtype=tf.float32, normalizer=None)]

实例化一个DNNRegressor神经网络回归模型。

您需要在这里提供两个参数：

    hidden_units，一个指定每个隐藏层中的节点数的超参数，（本例使用两个隐藏层，每个层具有10个节点），
    feature_columns包含刚刚定义的FeatureColumns列表

In [20]:
regressor = tf.contrib.learn.DNNRegressor(feature_columns=feature_cols,
                                          hidden_units=[10, 10],
                                          model_dir="/tmp/boston_model")

Instructions for updating:
Please switch to tf.contrib.estimator.*_head.
Instructions for updating:
Please replace uses of any Estimator from tf.contrib.learn with an Estimator from tf.estimator.*
Instructions for updating:
When switching to tf.estimator.Estimator, use tf.estimator.RunConfig instead.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1c2df16c18>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_train_distribute': None, '_eval_distribute': None, '_device_fn': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_protocol': None, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_ke

## 3. 构建input_fn

要将输入数据传入regressor，创建一个输入函数，它将接受一个pandas Dataframe并返回特征列和标签值作为 Tensors：

In [22]:
def input_fn(data_set):
    feature_cols = {k: tf.constant(data_set[k].values)
                    for k in FEATURES}
    labels = tf.constant(data_set[LABEL].values)
    return feature_cols, labels

## 4. 训练回归器

训练神经网络回归器，运行fit，其中training_set传递给input_fn，如下所示：

In [23]:
regressor.fit(input_fn=lambda: input_fn(training_set), steps=5000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/boston_model/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 5000 into /tmp/boston_model/model.ckpt.
INFO:tensorflow:loss = 27.67445, step = 5001
INFO:tensorflow:global_step/sec: 819.504
INFO:tensorflow:loss = 27.651707, step = 5101 (0.123 sec)
INFO:tensorflow:global_step/sec: 976.553
INFO:tensorflow:loss = 27.802027, step = 5201 (0.102 sec)
INFO:tensorflow:global_step/sec: 1114.95
INFO:tensorflow:loss = 27.646128, step = 5301 (0.090 sec)
INFO:tensorflow:global_step/sec: 990.964
INFO:tensorflow:loss = 27.76053, step = 5401 (0.101 sec)
INFO:tensorflow:global_step/sec: 1124.95
INFO:tensorflow:loss = 27.888899, step = 5501 (0.089 sec)
INFO:tensorflow:global_step/sec: 951.348
INFO:tensorflow:loss = 27.523384, step = 5601 (0.105 sec)
INFO:tensorflow:global_step/sec: 935.997
IN

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x182b1edfd0>, 'hidden_units': [10, 10], 'feature_columns': (_RealValuedColumn(column_name='crim', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='zn', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='indus', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='nox', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='rm', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='age', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='dis', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='tax', dimension=1, default_value=None, dtype=tf.float32

## 5. 评估模型

训练模型的性能对测试数据集的影响。运行 evaluate，这个时候传递 test_set 到 input_fn：

In [27]:
ev = regressor.evaluate(input_fn=lambda: input_fn(test_set), steps=1)

INFO:tensorflow:Starting evaluation at 2018-11-03-00:05:46
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/boston_model/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2018-11-03-00:05:46
INFO:tensorflow:Saving dict for global step 10000: global_step = 10000, loss = 11.02855


In [28]:
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))

Loss: 11.028550


## 6. 预测

最后，您可以使用该模型预测房价中值prediction_set，其中包含特征数据，但没有标签的六个示例：

In [34]:
y = regressor.predict_scores(input_fn=lambda: input_fn(prediction_set), batch_size=None)
# .predict() returns an iterator; convert to a list and print predictions
predictions = list(itertools.islice(y, 6))
print("Predictions: {}".format(str(predictions)))

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/boston_model/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Predictions: [33.973972, 18.967258, 23.540922, 35.306618, 17.051012, 18.789053]
