# Boston Housing Prices dataset: regression

In [0]:
from IPython import display
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras

There are 14 attributes in each case of the dataset:
1.	CRIM - per capita crime rate by town
2.	ZN - proportion of residential land zoned for lots over 25,000 sq.ft.
3.	INDUS - proportion of non-retail business acres per town.
4.	CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)
5.	NOX - nitric oxides concentration (parts per 10 million)
6.	RM - average number of rooms per dwelling
7.	AGE - proportion of owner-occupied units built prior to 1940
8.	DIS - weighted distances to five Boston employment centres
9.	RAD - index of accessibility to radial highways
10.	TAX - full-value property-tax rate per 10,000 USD
11.	PTRATIO - pupil-teacher ratio by town
12.	B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
13.	LSTAT - % lower status of the population
14.	MEDV - Median value of owner-occupied homes in 1000's USD



In [0]:
boston_housing = keras.datasets.boston_housing

(train_data, train_labels), (test_data, test_labels) = boston_housing.load_data()

order = np.argsort(np.random.random(train_labels.shape))
train_data = train_data[order]
train_labels = train_labels[order]

### Examples and features 



In [0]:
print("Training set: {}".format(train_data.shape))  # 404 examples, 13 features
print("Testing set:  {}".format(test_data.shape))   # 102 examples, 13 features

In [0]:
print("First training sample: {}".format(train_data[0]))

In [0]:
column_names = [
    'CRIM', 
    'ZN', 
    'INDUS', 
    'CHAS', 
    'NOX', 
    'RM', 
    'AGE', 
    'DIS', 
    'RAD',
    'TAX', 
    'PTRATIO', 
    'B',
    'LSTAT']

df = pd.DataFrame(train_data, columns=column_names)
df.head()

### Labels

The labels are the house prices in thousands of dollars. 

In [0]:
print(train_labels[0:10])

## Normalize features



In [0]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

train_data = (train_data - mean) / std
test_data = (test_data - mean) / std

print("First training sample (normalized): {}".format(train_data[0]))

## Create the model



In [0]:
def build_model():
  model = keras.Sequential([
    keras.layers.Dense(64, activation=tf.nn.relu, input_shape=(train_data.shape[1],)),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(1)
  ])

  optimizer = tf.train.RMSPropOptimizer(0.001)

  model.compile(
      loss='mse',
      optimizer=optimizer,
      metrics=['mae'])
  return model

model = build_model()
model.summary()

## Train the model


In [0]:
class print_dot_callback(keras.callbacks.Callback):
  def on_epoch_end(self,epoch,logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

epochs = 500

history = model.fit(
    train_data, 
    train_labels, 
    epochs=epochs,
    validation_split=0.2, 
    verbose=0,
    callbacks=[print_dot_callback()])

In [0]:
import matplotlib.pyplot as plt

def plot_history(history):
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [1000$]')
  plt.plot(history.epoch, np.array(history.history['mean_absolute_error']), label='Train Loss')
  plt.plot(history.epoch, np.array(history.history['val_mean_absolute_error']), label = 'Val loss')
  plt.legend()
  plt.ylim([0,5])

plot_history(history)

In [0]:
[loss, mae] = model.evaluate(test_data, test_labels, verbose=0)

print("Testing set Mean Absolute Error: ${:7.2f}".format(mae * 1000))

In [0]:
model = build_model()

early_stopping_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

history = model.fit(
    train_data, 
    train_labels, 
    epochs=epochs,
    validation_split=0.2, 
    verbose=0,
    callbacks=[early_stopping_callback, print_dot_callback()])

plot_history(history)

In [0]:
[loss, mae] = model.evaluate(test_data, test_labels, verbose=0)

print("Testing set Mean Absolute Error: ${:7.2f}".format(mae * 1000))

## Predict



In [0]:
test_predictions = model.predict(test_data).flatten()

print(test_predictions)