# Boston Housing Dataset: NN Regression

## Setup


In [0]:
from __future__ import print_function

from IPython import display
import math
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

In [0]:
training_file_url = "https://storage.googleapis.com/mledu-datasets/california_housing_train.csv"
training_dataframe = pd.read_csv(training_file_url, sep=",")
training_dataframe = training_dataframe.reindex(np.random.permutation(training_dataframe.index))

In [0]:
def preprocess_features(dataframe):
  selected_features = dataframe[
    ["latitude",
     "longitude",
     "housing_median_age",
     "total_rooms",
     "total_bedrooms",
     "population",
     "households",
     "median_income"]]
  processed_features = selected_features.copy() 
  processed_features["rooms_per_person"] = (dataframe["total_rooms"] / dataframe["population"])
  return processed_features

def preprocess_targets(dataframe):
  processed_targets = pd.DataFrame()  
  processed_targets["median_house_value"] = (dataframe["median_house_value"] / 1000.0)
  return processed_targets

In [0]:
TARGET_NAME = "median_house_value"

In [0]:
training_examples = preprocess_features(training_dataframe.head(12000))
training_targets = preprocess_targets(training_dataframe.head(12000))

validation_examples = preprocess_features(training_dataframe.tail(5000))
validation_targets = preprocess_targets(training_dataframe.tail(5000))

print("Training examples summary:")
display.display(training_examples.describe())
print("Validation examples summary:")
display.display(validation_examples.describe())

print("Training targets summary:")
display.display(training_targets.describe())
print("Validation targets summary:")
display.display(validation_targets.describe())

## Building a Neural Network


In [0]:
def create_feature_columns(input_features):
  return set([tf.feature_column.numeric_column(feature) for feature in input_features])

In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    features = {key:np.array(value) for key,value in dict(features).items()}                                             
 
    ds = Dataset.from_tensor_slices((features,targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    if shuffle:
      ds = ds.shuffle(10000)
    
    features2, targets2 = ds.make_one_shot_iterator().get_next()
    return features2, targets2

In [0]:
def train_nn_regression_model(
    learning_rate,
    steps,
    batch_size,
    hidden_units,
    training_examples,
    training_targets,
    validation_examples,
    validation_targets):

  periods = 10
  steps_per_period = steps / periods
  
  optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
  optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, 5.0)
  
  dnn_regressor = tf.estimator.DNNRegressor(
      feature_columns=create_feature_columns(training_examples),
      hidden_units=hidden_units,
      optimizer=optimizer,
  )
  
  training_input_fn = lambda: input_fn(
      training_examples, 
      training_targets[TARGET_NAME], 
      batch_size=batch_size)
  
  predict_training_input_fn = lambda: input_fn(
      training_examples, 
      training_targets[TARGET_NAME], 
      num_epochs=1, 
      shuffle=False)
  
  predict_validation_input_fn = lambda: input_fn(
      validation_examples, 
      validation_targets[TARGET_NAME], 
      num_epochs=1, 
      shuffle=False)

  print("Model training started.")
  print("RMSE (on training data):")
  
  training_rmse_periods = []
  validation_rmse_periods = []

  for period in range (0, periods):
    dnn_regressor.train(
        input_fn=training_input_fn,
        steps=steps_per_period
    )

    training_predictions = dnn_regressor.predict(input_fn=predict_training_input_fn)
    training_predictions = np.array([item['predictions'][0] for item in training_predictions])
    
    validation_predictions = dnn_regressor.predict(input_fn=predict_validation_input_fn)
    validation_predictions = np.array([item['predictions'][0] for item in validation_predictions])
    
    training_rmse = math.sqrt(metrics.mean_squared_error(training_predictions, training_targets))
    validation_rmse = math.sqrt(metrics.mean_squared_error(validation_predictions, validation_targets))
    
    print("  period %02d : %0.2f" % (period, training_rmse))

    training_rmse_periods.append(training_rmse)
    validation_rmse_periods.append(validation_rmse)

  print("Model training finished.")

  plt.ylabel("RMSE")
  plt.xlabel("Periods")
  plt.title("Root Mean Squared Error vs. Periods")
  plt.tight_layout()
  plt.plot(training_rmse_periods, label="training")
  plt.plot(validation_rmse_periods, label="validation")
  plt.legend()

  print("Final RMSE (on training data):   %0.2f" % training_rmse)
  print("Final RMSE (on validation data): %0.2f" % validation_rmse)

  return dnn_regressor

## Train a NN Model



In [0]:
dnn_regressor = train_nn_regression_model(
    learning_rate=0.001,
    steps=2000,
    batch_size=100,
    hidden_units=[10, 10],
    training_examples=training_examples,
    training_targets=training_targets,
    validation_examples=validation_examples,
    validation_targets=validation_targets)

## Evaluate on Test Data



In [0]:
test_file_url = "https://storage.googleapis.com/mledu-datasets/california_housing_test.csv"
test_dataframe = pd.read_csv(test_file_url, sep=",")

test_examples = preprocess_features(test_dataframe)
test_targets = preprocess_targets(test_dataframe)

predict_test_input_fn = lambda: input_fn(
    test_examples, 
    test_targets[TARGET_NAME], 
    num_epochs=1, 
    shuffle=False)

test_predictions = dnn_regressor.predict(input_fn=predict_test_input_fn)
test_predictions = np.array([item['predictions'][0] for item in test_predictions])

test_rmse = math.sqrt(metrics.mean_squared_error(test_predictions, test_targets))

print("Final RMSE (on test data): %0.2f" % test_rmse)