# Boston Housing Prices dataset: regression with Keras

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from IPython import display
import pandas as pd
import tensorflow as tf

pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format
tf.logging.set_verbosity(tf.logging.INFO)

1.	CRIM - per capita crime rate by town
2.	ZN - proportion of residential land zoned for lots over 25,000 sq.ft.
3.	INDUS - proportion of non-retail business acres per town.
4.	~~CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)~~
5.	NOX - nitric oxides concentration (parts per 10 million)
6.	RM - average number of rooms per dwelling
7.	AGE - proportion of owner-occupied units built prior to 1940
8.	DIS - weighted distances to five Boston employment centres
9.	~~RAD - index of accessibility to radial highways~~
10.	TAX - full-value property-tax rate per 10,000 USD 
11.	PTRATIO - pupil-teacher ratio by town
12.	~~B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town~~
13.	~~LSTAT - % lower status of the population~~
14.	MEDV - Median value of owner-occupied homes in 1000's USD



In [2]:
COLUMNS = ["CRIM", "ZN", "INDUS", "NOX", "RM", "AGE", "DIS", "TAX", "PTRATIO", "MEDV"]

FEATURES = COLUMNS[:-1]
TARGET = COLUMNS[-1]

print("Features: {}".format(FEATURES))
print("Target: {}".format(TARGET))

Features: ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO']
Target: MEDV


In [0]:
def my_input_fn(data_set, num_epochs=None, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
        x=pd.DataFrame({f: data_set[f].values for f in FEATURES}),
        y=pd.Series(data_set[TARGET].values),
        num_epochs=num_epochs,
        shuffle=shuffle)

In [4]:
URL_PREFIX = "https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/tutorials/input_fn/"

training_set = pd.read_csv(
    URL_PREFIX+"boston_train.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

test_set = pd.read_csv(
    URL_PREFIX+"boston_test.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

prediction_set = pd.read_csv(
    URL_PREFIX+"boston_predict.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

print("Training set summary:")
display.display(training_set.describe())

print("Test set summary:")
display.display(test_set.describe())

print("Prediction set summary:")
display.display(prediction_set.describe())

Training set summary:


Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,TAX,PTRATIO,MEDV
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,3.9,11.3,11.4,0.6,6.3,69.1,3.8,411.7,18.5,22.6
std,9.4,23.5,7.0,0.1,0.7,28.5,2.1,171.6,2.2,9.6
min,0.0,0.0,0.5,0.4,3.6,2.9,1.1,187.0,12.6,5.0
25%,0.1,0.0,5.2,0.4,5.9,44.9,2.1,277.0,17.4,16.6
50%,0.3,0.0,9.7,0.5,6.2,78.8,3.2,332.0,19.1,21.4
75%,3.7,12.5,18.1,0.6,6.6,94.6,5.1,666.0,20.2,25.0
max,89.0,100.0,27.7,0.9,8.8,100.0,12.1,711.0,22.0,50.0


Test set summary:


Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,TAX,PTRATIO,MEDV
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,2.4,10.8,10.3,0.5,6.3,66.9,3.9,395.1,18.3,22.1
std,4.6,22.3,6.2,0.1,0.6,26.5,2.1,154.1,2.2,7.5
min,0.0,0.0,1.5,0.4,5.0,8.4,1.4,188.0,12.6,7.4
25%,0.1,0.0,5.9,0.5,5.9,48.0,2.2,286.2,16.8,18.3
50%,0.2,0.0,8.6,0.5,6.2,71.6,3.2,330.0,18.6,20.9
75%,2.2,12.5,18.1,0.6,6.6,89.9,5.5,432.0,20.2,24.4
max,22.6,82.5,25.6,0.9,8.7,100.0,10.6,666.0,22.0,50.0


Prediction set summary:


Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,TAX,PTRATIO,MEDV
count,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,0.0
mean,2.3,22.2,9.4,0.6,6.4,60.4,4.3,399.0,19.2,
std,3.5,29.6,7.3,0.1,0.7,32.4,2.2,208.7,0.9,
min,0.0,0.0,2.2,0.4,5.4,15.8,1.7,222.0,18.3,
25%,0.1,0.0,3.5,0.5,5.9,41.7,2.8,260.0,18.4,
50%,0.2,12.5,7.5,0.5,6.5,57.5,4.0,294.0,19.0,
75%,3.9,31.0,16.1,0.7,7.0,86.8,5.1,575.5,20.1,
max,8.2,75.0,18.1,0.7,7.2,98.9,8.0,666.0,20.2,


In [0]:
my_feature_columns = [tf.feature_column.numeric_column(f) for f in FEATURES]

In [6]:
regressor = tf.estimator.DNNRegressor(
    feature_columns=my_feature_columns,
    hidden_units=[10, 10],
    model_dir="/tmp/boston_housing")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2015f9c590>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_device_fn': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/boston_housing', '_train_distribute': None, '_save_summary_steps': 100}


In [7]:
regressor.train(input_fn=my_input_fn(training_set), steps=5000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/boston_housing/model.ckpt.
INFO:tensorflow:loss = 2643046.0, step = 0
INFO:tensorflow:global_step/sec: 240.963
INFO:tensorflow:loss = 10134.285, step = 100 (0.420 sec)
INFO:tensorflow:global_step/sec: 260.15
INFO:tensorflow:loss = 10495.291, step = 200 (0.380 sec)
INFO:tensorflow:global_step/sec: 218.485
INFO:tensorflow:loss = 9703.773, step = 300 (0.466 sec)
INFO:tensorflow:global_step/sec: 200.956
INFO:tensorflow:loss = 10489.892, step = 400 (0.496 sec)
INFO:tensorflow:global_step/sec: 197.106
INFO:tensorflow:loss = 7748.753, step = 500 (0.505 sec)
INFO:tensorflow:global_step/sec: 200.143
INFO:tensorflow:loss = 7558.133, step = 600 (0.498 sec)
INFO:tensorflow:global_step/sec: 202.209
INFO:tensorflow:l

INFO:tensorflow:global_step/sec: 192.954
INFO:tensorflow:loss = 4058.0725, step = 2900 (0.519 sec)
INFO:tensorflow:global_step/sec: 203.425
INFO:tensorflow:loss = 6692.744, step = 3000 (0.491 sec)
INFO:tensorflow:global_step/sec: 198.678
INFO:tensorflow:loss = 6284.3066, step = 3100 (0.509 sec)
INFO:tensorflow:global_step/sec: 200.162
INFO:tensorflow:loss = 7045.482, step = 3200 (0.501 sec)
INFO:tensorflow:global_step/sec: 192.722
INFO:tensorflow:loss = 7414.207, step = 3300 (0.518 sec)
INFO:tensorflow:global_step/sec: 191.716
INFO:tensorflow:loss = 7274.093, step = 3400 (0.515 sec)
INFO:tensorflow:global_step/sec: 206.222
INFO:tensorflow:loss = 3871.4172, step = 3500 (0.490 sec)
INFO:tensorflow:global_step/sec: 200.548
INFO:tensorflow:loss = 5158.381, step = 3600 (0.494 sec)
INFO:tensorflow:global_step/sec: 192.115
INFO:tensorflow:loss = 6055.6665, step = 3700 (0.521 sec)
INFO:tensorflow:global_step/sec: 193.492
INFO:tensorflow:loss = 6360.423, step = 3800 (0.518 sec)
INFO:tensorflow:

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7f2015f9c790>

In [8]:
evaluation = regressor.evaluate(input_fn=my_input_fn(test_set, num_epochs=1, shuffle=False))

print("Evaluation: {}".format(evaluation))
print("Loss: {0:f}".format(evaluation["loss"]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-11-17:27:56
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/boston_housing/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-11-17:27:56
INFO:tensorflow:Saving dict for global step 5000: average_loss = 21.283829, global_step = 5000, loss = 2128.3828
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /tmp/boston_housing/model.ckpt-5000
Evaluation: {'average_loss': 21.283829, 'global_step': 5000, 'loss': 2128.3828}
Loss: 2128.382812


In [9]:
predictions = regressor.predict(input_fn=my_input_fn(prediction_set, num_epochs=1, shuffle=False))

for prediction in predictions:
    print("prediction: {}".format(prediction))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/boston_housing/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
prediction: {'predictions': array([35.15348], dtype=float32)}
prediction: {'predictions': array([20.169031], dtype=float32)}
prediction: {'predictions': array([24.054123], dtype=float32)}
prediction: {'predictions': array([34.626022], dtype=float32)}
prediction: {'predictions': array([15.523567], dtype=float32)}
prediction: {'predictions': array([20.708855], dtype=float32)}
