# 预测房价
为了讲解自定义estimator的输入数据

In [1]:
import tensorflow as tf
import pandas as pd
import requests
import itertools

## 1.下载数据

In [None]:
TRAIN_URL = "http://download.tensorflow.org/data/boston_train.csv"
TEST_URL = "http://download.tensorflow.org/data/boston_test.csv"
PREDICT_URL = "http://download.tensorflow.org/data/boston_predict.csv"

def download(url):
    resp = requests.get(url)
    name = url[url.rfind("/") + 1:]
    with open('data/' + name, 'wb') as f:
        f.write(resp.content)

download(TRAIN_URL)
download(TEST_URL)
download(PREDICT_URL)


In [6]:
# 解析数据

COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
           "dis", "tax", "ptratio", "medv"]
FEATURES = ["crim", "zn", "indus", "nox", "rm",
            "age", "dis", "tax", "ptratio"]
LABEL = "medv"

# skipinitialspace : 忽略分隔符后的空白（默认为False，即不忽略）
training_set = pd.read_csv('data/boston_train.csv',skipinitialspace=True,skiprows=1,names=COLUMNS)
print(training_set.head(3))
test_set = pd.read_csv('data/boston_test.csv',skipinitialspace=True,skiprows=1,names=COLUMNS)
print(test_set.head(3))
predict_set = pd.read_csv('data/boston_predict.csv',skipinitialspace=True,skiprows=1,names=COLUMNS)
print(predict_set.head(3))

       crim   zn  indus    nox     rm   age     dis  tax  ptratio  medv
0   2.30040  0.0  19.58  0.605  6.319  96.1  2.1000  403     14.7  23.8
1  13.35980  0.0  18.10  0.693  5.887  94.7  1.7821  666     20.2  12.7
2   0.12744  0.0   6.91  0.448  6.770   2.9  5.7209  233     17.9  26.6
      crim    zn  indus    nox     rm   age     dis  tax  ptratio  medv
0  0.13587   0.0  10.59  0.489  6.064  59.1  4.2392  277     18.6  24.4
1  0.08664  45.0   3.44  0.437  7.178  26.3  6.4798  398     15.2  36.4
2  0.26938   0.0   9.90  0.544  6.266  82.8  3.2628  304     18.4  21.6
      crim    zn  indus    nox     rm   age     dis  tax  ptratio  medv
0  0.03359  75.0   2.95  0.428  7.024  15.8  5.4011  252     18.3   NaN
1  5.09017   0.0  18.10  0.713  6.297  91.8  2.3682  666     20.2   NaN
2  0.12650  25.0   5.13  0.453  6.762  43.4  7.9809  284     19.7   NaN


In [7]:
# 创建feature_cols
# 关于更详细的feature_cols,看:https://www.tensorflow.org/tutorials/linear?hl=zh-cn#feature_columns_and_transformations
feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]

In [8]:
# 初始化回归模型

regressor = tf.estimator.DNNRegressor(hidden_units=[10,10],feature_columns=feature_cols,model_dir='/tmp/house_model')

INFO:tensorflow:Using default config.


INFO:tensorflow:Using config: {'_model_dir': '/tmp/house_model', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}


In [9]:
# 创建我们的输入函数

def my_input_fn(data_set, num_epochs=None, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
        x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
        y=pd.Series(data_set[LABEL].values),
        num_epochs=num_epochs,
        shuffle=shuffle)


In [12]:
# 训练数据

regressor.train(input_fn=my_input_fn(training_set),steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Restoring parameters from /tmp/house_model/model.ckpt-6000


INFO:tensorflow:Saving checkpoints for 6001 into /tmp/house_model/model.ckpt.


INFO:tensorflow:loss = 2619.21, step = 6001


INFO:tensorflow:global_step/sec: 363.648


INFO:tensorflow:loss = 4094.67, step = 6101 (0.278 sec)


INFO:tensorflow:global_step/sec: 352.822


INFO:tensorflow:loss = 3853.81, step = 6201 (0.280 sec)


INFO:tensorflow:global_step/sec: 405.259


INFO:tensorflow:loss = 4074.71, step = 6301 (0.246 sec)


INFO:tensorflow:global_step/sec: 466.653


INFO:tensorflow:loss = 3825.88, step = 6401 (0.217 sec)


INFO:tensorflow:global_step/sec: 471.589


INFO:tensorflow:loss = 4001.2, step = 6501 (0.209 sec)


INFO:tensorflow:global_step/sec: 466.856


INFO:tensorflow:loss = 6056.1, step = 6601 (0.213 sec)


INFO:tensorflow:global_step/sec: 484.405


INFO:tensorflow:loss = 2652.07, step = 6701 (0.208 sec)


INFO:tensorflow:global_step/sec: 461.722


INFO:tensorflow:loss = 3416.54, step = 6801 (0.216 sec)


INFO:tensorflow:global_step/sec: 471.628


INFO:tensorflow:loss = 3187.12, step = 6901 (0.212 sec)


INFO:tensorflow:Saving checkpoints for 7000 into /tmp/house_model/model.ckpt.


INFO:tensorflow:Loss for final step: 2781.42.


<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7feddd2d77f0>

In [13]:
# 评估模型

ev = regressor.evaluate(input_fn=my_input_fn(test_set,num_epochs=1,shuffle=False))
loss = ev["loss"]
print("Loss:{0:f}".format(loss))


INFO:tensorflow:Starting evaluation at 2017-12-08-12:07:27


INFO:tensorflow:Restoring parameters from /tmp/house_model/model.ckpt-7000


INFO:tensorflow:Finished evaluation at 2017-12-08-12:07:27


INFO:tensorflow:Saving dict for global step 7000: average_loss = 11.6127, global_step = 7000, loss = 1161.27


Loss:1161.270752


In [14]:
# 做预测

y = regressor.predict(input_fn=my_input_fn(predict_set, num_epochs=1, shuffle=False))
predictions = list(p["predictions"] for p in itertools.islice(y, 6))
print("Predictions: {}".format(str(predictions)))

INFO:tensorflow:Restoring parameters from /tmp/house_model/model.ckpt-7000


Predictions: [array([ 33.66023254], dtype=float32), array([ 17.50827408], dtype=float32), array([ 23.53103447], dtype=float32), array([ 35.09741211], dtype=float32), array([ 15.20833969], dtype=float32), array([ 18.53084373], dtype=float32)]


# 总结

loss的值和官方看起来差了100倍,但预测结果差不多