# Boston Housing Dataset

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)

1.	CRIM - per capita crime rate by town
2.	ZN - proportion of residential land zoned for lots over 25,000 sq.ft.
3.	INDUS - proportion of non-retail business acres per town.
4.	CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)
5.	NOX - nitric oxides concentration (parts per 10 million)
6.	RM - average number of rooms per dwelling
7.	AGE - proportion of owner-occupied units built prior to 1940
8.	DIS - weighted distances to five Boston employment centres
9.	RAD - index of accessibility to radial highways
10.	TAX - full-value property-tax rate per USD 10,000
11.	PTRATIO - pupil-teacher ratio by town
12.	B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
13.	LSTAT - % lower status of the population
14.	MEDV - Median value of owner-occupied homes in USD 1000's



In [11]:
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio", "medv"]

FEATURES = COLUMNS[:-1]
LABEL = COLUMNS[-1]

print("Features: {}".format(FEATURES))
print("Label: {}".format(LABEL))

Features: ['crim', 'zn', 'indus', 'nox', 'rm', 'age', 'dis', 'tax', 'ptratio']
Label: medv


In [0]:
def my_input_fn(data_set, num_epochs=None, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
        x=pd.DataFrame({my_feature: data_set[my_feature].values for my_feature in FEATURES}),
        y=pd.Series(data_set[LABEL].values),
        num_epochs=num_epochs,
        shuffle=shuffle)

In [0]:
URL_PREFIX = "https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/tutorials/input_fn/"

training_set = pd.read_csv(
    URL_PREFIX+"boston_train.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

test_set = pd.read_csv(
    URL_PREFIX+"boston_test.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

prediction_set = pd.read_csv(
    URL_PREFIX+"boston_predict.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

In [0]:
my_feature_columns = [tf.feature_column.numeric_column(my_feature) for my_feature in FEATURES]

In [0]:
regressor = tf.estimator.DNNRegressor(
    feature_columns=my_feature_columns,
    hidden_units=[10, 10],
    model_dir="/tmp/boston_housing")

In [16]:
regressor.train(input_fn=my_input_fn(training_set), steps=5000)

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7febab7341d0>

In [17]:
evaluation = regressor.evaluate(input_fn=my_input_fn(test_set, num_epochs=1, shuffle=False))

print("Evaluation: {}".format(evaluation))
print("Loss: {0:f}".format(evaluation["loss"]))

Evaluation: {'average_loss': 15.496426, 'global_step': 10000, 'loss': 1549.6426}
Loss: 1549.642578


In [18]:
predictions = regressor.predict(input_fn=my_input_fn(prediction_set, num_epochs=1, shuffle=False))

for prediction in predictions:
    print("Prediction {}".format(prediction))

Prediction {'predictions': array([35.5695], dtype=float32)}
Prediction {'predictions': array([18.927225], dtype=float32)}
Prediction {'predictions': array([24.989763], dtype=float32)}
Prediction {'predictions': array([36.304363], dtype=float32)}
Prediction {'predictions': array([17.109018], dtype=float32)}
Prediction {'predictions': array([19.793098], dtype=float32)}
