In [1]:
# regression predictive modelling problem using the boston housing dataset

import numpy as np

from keras.models import Sequential
from keras.layers import Dense

from keras.datasets import boston_housing

(X,Y), (x_test, y_test) = boston_housing.load_data(test_split=0.2)

# import sklearn tools for validation
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


Using TensorFlow backend.


In [2]:
# global settings:
g_standardize_data = False

In [8]:
def baseline_model():
    # create model scaffold
    model = Sequential()
    model.add(Dense(13, kernel_initializer='normal', input_dim=13, activation="relu"))
    model.add(Dense(1, kernel_initializer='normal'))

    # compile model with mean square error-error function
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


In [9]:
# including the scikit-learn modules for analysis gives greater flecxibility
# use fixed random seed to allow for reproducible results
seed=7
np.random.seed(seed)

def train_and_output(estimator, x_data, y_data):
    kfold = KFold(n_splits=10, random_state=seed)
    results = cross_val_score(estimator, x_data,y_data, cv=kfold)
    return results

# using skikit-learn's pipeline tools to create the standard estimators (squash the data)
# should improve model output:
def create_pipeline(model, standardize=False):
    if standardize: # run with standardased data:
        estimators=[]
        estimators.append(('standardize', StandardScaler()))
        estimators.append(('mlp', KerasRegressor(build_fn=model, epochs=50, batch_size=5, verbose=0)))
        pipeline = Pipeline(estimators)

#         kfold=KFold(n_splits=10, random_state=seed)
#         results = cross_val_score(pipeline, X,Y, cv=kfold)
#         print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
    else:
        pipeline = KerasRegressor(build_fn=model, epochs=100, batch_size=5, verbose=0)
        
    return pipeline
    


In [10]:
pipeline = create_pipeline(baseline_model, g_standardize_data)
results = train_and_output(pipeline, X, Y)
print("Baseline model: %.2f (%.2f) MSE" % (abs(results.mean()), results.std()))


Baseline model: 22.78 (9.01) MSE


In [6]:
# testing with a larger model:
# should improve model output:

def larger_model():
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(6, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, input_dim=13, kernel_initializer='normal'))
    
    #compile:             
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

pipeline = create_pipeline(larger_model, True)
results = train_and_output(pipeline, X, Y)
print("2 Layer-model: %.2f (%.2f) MSE" % (abs(results.mean()), results.std()))


2 Layer-model: 17.84 (8.86) MSE


In [7]:
# testing with a 'wider' model:
def wider_model():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

pipeline = create_pipeline(wider_model, True)
results = train_and_output(pipeline, X, Y)
print("20 neuron wide-model: %.2f, (%.2f) MSE" % (abs(results.mean()), results.std()))

20 neuron wide-model: 20.08, (9.67) MSE


In [5]:
# the built in keras trainer:

model = baseline_model()
estimator = model.fit(X, Y, epochs=100, batch_size=5, verbose=0)

# evaluation:
loss_and_metrics=model.evaluate(x_test, y_test, batch_size=5)
print("loss: {}".format(loss_and_metrics)) # does not output the stddev.



In [None]:
# optional: prediction
classes = model.predict(x_test, batch_size=5)

# predict using the scipy-learn setup:
def mse(self, prediction, actual):
    # todo: use numpy arrays
    errors = []
    for i in range(0, len(prediction)):
        errors[i] = pow(prediction[i]-actual[i], 2)

    return mean(errors)

# NOTE:: uses the pipeline object
pipeline.fit(self.X,self.Y)
prediction = estimator.predict(x_test)

print("prediction: %.2f" % self.mse(prediction, self.y_test))