# Boston Housing Dataset

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)

1.	CRIM - per capita crime rate by town
2.	ZN - proportion of residential land zoned for lots over 25,000 sq.ft.
3.	INDUS - proportion of non-retail business acres per town.
4.	CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)
5.	NOX - nitric oxides concentration (parts per 10 million)
6.	RM - average number of rooms per dwelling
7.	AGE - proportion of owner-occupied units built prior to 1940
8.	DIS - weighted distances to five Boston employment centres
9.	RAD - index of accessibility to radial highways
10.	TAX - full-value property-tax rate per $10,000
11.	PTRATIO - pupil-teacher ratio by town
12.	B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
13.	LSTAT - % lower status of the population
14.	MEDV - Median value of owner-occupied homes in $1000's



In [0]:
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio", "medv"]
#FEATURES = ["crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio"]
#LABEL = "medv"

FEATURES = COLUMNS[:-1]
LABEL = COLUMNS[-1]

print("Features: {}".format(FEATURES))
print("Label: {}".format(LABEL))

In [0]:
def get_input_fn(data_set, num_epochs=None, shuffle=True):
  return tf.estimator.inputs.pandas_input_fn(
      x=pd.DataFrame({feature: data_set[feature].values for feature in FEATURES}),
      y=pd.Series(data_set[LABEL].values),
      num_epochs=num_epochs,
      shuffle=shuffle)

In [0]:
URL_PREFIX = "https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/tutorials/input_fn/"

training_set = pd.read_csv(
    URL_PREFIX+"boston_train.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

test_set = pd.read_csv(
    URL_PREFIX+"boston_test.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

prediction_set = pd.read_csv(
    URL_PREFIX+"boston_predict.csv", 
    skipinitialspace=True,
    skiprows=1, 
    names=COLUMNS)

In [0]:
feature_columns = [tf.feature_column.numeric_column(feature) for feature in FEATURES]

In [0]:
regressor = tf.estimator.DNNRegressor(
    feature_columns=feature_columns,
    hidden_units=[10, 10],
    model_dir="/tmp/boston_housing")

In [0]:
regressor.train(
    input_fn=get_input_fn(training_set), steps=5000)

In [0]:
evaluation = regressor.evaluate(
    input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))

print("Evaluation: {}".format(evaluation))

loss = evaluation["loss"]
print("Loss: {0:f}".format(loss))

In [0]:
predictions = regressor.predict(
    input_fn=get_input_fn(prediction_set, num_epochs=1, shuffle=False))

for prediction in predictions:
  print("Prediction {}".format(prediction))