In [58]:
#Loading the Boston housing dataset
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

In [59]:
#Printing the shape of train and test data
print("The shape of train_data is" + ' ' + str(train_data.shape) + '.')
print("The shape of test_data is" + ' ' + str(test_data.shape) + '.')

The shape of train_data is (404, 13).
The shape of test_data is (102, 13).


In [57]:
#Preparing the data
#Each column in the input data represents one of the 13 features, for example crime rate
#We do feature-wise normalization, i.e. for each feature we subtract the mean of the feature and divide by
#the standard deviation, s.t. the feature is centered around 0 and has a unit standard deviation
print(train_data.shape)
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

#Normalizing the test data, never use any quantity computed in the test data in the workflow
test_data -= mean
test_data /= std

(404, 13)


In [52]:
#Building the network
#We have very few samples and thus use a very small network with two hidden layers, each with 64 units
#In general: the less training data, the worse overfitting will be
#A small network is one way to mitigate overfitting
from keras import models
from keras import layers

def build_model(): #We need to instantiate the same model multiple times and thus define a function
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape = (train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1)) #single unit and no activation, i.e. linear layer
                               #typical setup for scalar regression
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) #mean squared error widely used for regression 
                                                                    #problems
    return model