In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from run_notebook import execute_notebook
from time import time

Using TensorFlow backend.


In [2]:
execute_notebook('./data/dataset-boston-house-prices.ipynb')

Shape:
(506, 14)
_array:
[[  6.32000000e-03   1.80000000e+01   2.31000000e+00   0.00000000e+00
    5.38000000e-01   6.57500000e+00   6.52000000e+01   4.09000000e+00
    1.00000000e+00   2.96000000e+02   1.53000000e+01   3.96900000e+02
    4.98000000e+00   2.40000000e+01]
 [  2.73100000e-02   0.00000000e+00   7.07000000e+00   0.00000000e+00
    4.69000000e-01   6.42100000e+00   7.89000000e+01   4.96710000e+00
    2.00000000e+00   2.42000000e+02   1.78000000e+01   3.96900000e+02
    9.14000000e+00   2.16000000e+01]
 [  2.72900000e-02   0.00000000e+00   7.07000000e+00   0.00000000e+00
    4.69000000e-01   7.18500000e+00   6.11000000e+01   4.96710000e+00
    2.00000000e+00   2.42000000e+02   1.78000000e+01   3.92830000e+02
    4.03000000e+00   3.47000000e+01]
 [  3.23700000e-02   0.00000000e+00   2.18000000e+00   0.00000000e+00
    4.58000000e-01   6.99800000e+00   4.58000000e+01   6.06220000e+00
    3.00000000e+00   2.22000000e+02   1.87000000e+01   3.94630000e+02
    2.94000000e+00   3.3

In [3]:
def create_baseline():
    _model = Sequential()
    _model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(1, kernel_initializer='normal'))
    _model.compile(loss='mean_squared_error', optimizer='adam')
    return _model

def create_deeper():
    _model = Sequential()
    _model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(6, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(1, kernel_initializer='normal'))
    _model.compile(loss='mean_squared_error', optimizer='adam')
    return _model    

def create_wider():
    _model = Sequential()
    _model.add(Dense(20, input_dim=13, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(1, kernel_initializer='normal'))
    _model.compile(loss='mean_squared_error', optimizer='adam')
    return _model

In [4]:
_estimator_baseline = KerasRegressor(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)

In [5]:
_kfold = KFold(n_splits=10, random_state=_seed)

In [6]:
_start = time()
_results = cross_val_score(_estimator_baseline, _X, _Y, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))

time: 1.66 minutes


In [7]:
print('Baseline: {} ({}) MSE'.format(_results.mean(), _results.std()))

Baseline: 31.909550037617777 (21.72701182706889) MSE


In [8]:
# An important concern with the Boston house price dataset is that the input attributes all vary in their 
# scales because they measure different quantities. 
# We can use scikit-learn’s Pipeline framework3 to perform the standardization during the model evaluation 
# process, within each fold of the cross-validation. 
# This ensures that there is no data leakage from each testset cross-validation fold into the training data.

In [9]:
_estimators = []
_estimators.append(('Standardize', StandardScaler()))
_estimators.append(('mlp', KerasRegressor(build_fn=create_baseline, epochs=100, batch_size=10, verbose=0)))

In [10]:
_pipeline = Pipeline(_estimators)

In [11]:
_start = time()
_results = cross_val_score(_pipeline, _X, _Y, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))

time: 0.91 minutes


In [12]:
print('Baseline: {} ({}) MSE'.format(_results.mean(), _results.std()))

Baseline: 27.378453547025423 (30.618107830217973) MSE


In [13]:
# A further extension of this section would be to similarly apply a rescaling to the output variable such 
# as normalizing it to the range of 0 to 1 and use a Sigmoid or similar activation function on the output 
# layer to narrow output predictions to the same range.

In [14]:
# Evaluate a deeper network topology:

# One way to improve the performance of a neural network is to add more layers. 
# This might allow the model to extract and recombine higher order features embedded in the data. 
# In this section we will evaluate the effect of adding one more hidden layer to the model.

In [15]:
_estimators_deeper = []
_estimators_deeper.append(('Standardize', StandardScaler()))
_estimators_deeper.append(('mlp', KerasRegressor(build_fn=create_deeper, epochs=100, batch_size=10, verbose=0)))

In [16]:
_pipeline_deeper = Pipeline(_estimators_deeper)

In [17]:
_start = time()
_results = cross_val_score(_pipeline_deeper, _X, _Y, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))

time: 1.10 minutes


In [18]:
print('Baseline: {} ({}) MSE'.format(_results.mean(), _results.std()))

Baseline: 21.368724352470508 (25.9216013776058) MSE


In [19]:
# Evaluate a wider network topology

# In this section we evaluate the effect of keeping a shallow network architecture and nearly doubling the 
# number of neurons in the one hidden layer.

In [20]:
_estimators_wider = []
_estimators_wider.append(('Standardize', StandardScaler()))
_estimators_wider.append(('mlp', KerasRegressor(build_fn=create_wider, epochs=100, batch_size=10, verbose=0)))

In [21]:
_pipeline_wider = Pipeline(_estimators_wider)

In [22]:
_start = time()
_results = cross_val_score(_pipeline_wider, _X, _Y, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))

time: 1.00 minutes


In [23]:
print('Baseline: {} ({}) MSE'.format(_results.mean(), _results.std()))

Baseline: 24.636608265607673 (27.555178708535756) MSE


In [24]:
# The results demonstrate the importance of empirical testing when it comes to developing neural network models.