In [1]:
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

Using TensorFlow backend.


Let's look at the data

In [2]:
train_data.shape

(404, 13)

its got 404 training examples and 13 numerical features. the targets are the median values of owner-occupied homes, in thousands of dollars from the 1970s

In [3]:
train_targets

array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4, 12.1,
       17.9, 23.1, 19.9, 15.7,  8.8, 50. , 22.5, 24.1, 27.5, 10.9, 30.8,
       32.9, 24. , 18.5, 13.3, 22.9, 34.7, 16.6, 17.5, 22.3, 16.1, 14.9,
       23.1, 34.9, 25. , 13.9, 13.1, 20.4, 20. , 15.2, 24.7, 22.2, 16.7,
       12.7, 15.6, 18.4, 21. , 30.1, 15.1, 18.7,  9.6, 31.5, 24.8, 19.1,
       22. , 14.5, 11. , 32. , 29.4, 20.3, 24.4, 14.6, 19.5, 14.1, 14.3,
       15.6, 10.5,  6.3, 19.3, 19.3, 13.4, 36.4, 17.8, 13.5, 16.5,  8.3,
       14.3, 16. , 13.4, 28.6, 43.5, 20.2, 22. , 23. , 20.7, 12.5, 48.5,
       14.6, 13.4, 23.7, 50. , 21.7, 39.8, 38.7, 22.2, 34.9, 22.5, 31.1,
       28.7, 46. , 41.7, 21. , 26.6, 15. , 24.4, 13.3, 21.2, 11.7, 21.7,
       19.4, 50. , 22.8, 19.7, 24.7, 36.2, 14.2, 18.9, 18.3, 20.6, 24.6,
       18.2,  8.7, 44. , 10.4, 13.2, 21.2, 37. , 30.7, 22.9, 20. , 19.3,
       31.7, 32. , 23.1, 18.8, 10.9, 50. , 19.6,  5. , 14.4, 19.8, 13.8,
       19.6, 23.9, 24.5, 25. , 19.9, 17.2, 24.6, 13

normalize data (normalization is only done with training data)

In [4]:
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

build the model. Because of the limited dataset, we want to use a small network as overfitting is a problem with a small dataset

In [5]:
from keras import models
from keras import layers

def build_model(): #We'll be building the model multiple times
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',
             input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1)) #Single unit with no activation, linear regression
    model.compile(optimizer='rmsprop',loss='mse',metrics=['mae']) #experiment with mean absolute error
    return model

Validation. Use K fold validation to improve training on small datasets

In [8]:
import numpy as np

k = 4
num_val_samples = len(train_data)//k
num_epochs = 100
all_scores = []

for i in range(k):
    print('processing fold #:',i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
    
    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
        train_data[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
        train_targets[(i + 1) * num_val_samples:]],
        axis=0)
    
    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)
np.mean(all_scores)

processing fold #: 0
processing fold #: 1
processing fold #: 2
processing fold #: 3


2.5290173455925276

Save validation logs

In [16]:
num_epochs = 500
all_mae_histories = []
for i in range(k):
    print('processing fold #:',i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
    
    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
        train_data[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
        train_targets[(i + 1) * num_val_samples:]],
        axis=0)
    
    model = build_model()
    history = model.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)
    mae_history = history.history['loss']
    all_mae_histories.append(mae_history)

average_mae_history = [np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]
                               

processing fold #: 0
processing fold #: 1
processing fold #: 2
processing fold #: 3


In [15]:
history.history

{'loss': [204.96942994465562,
  27.720018062338873,
  20.092320034440636,
  18.00172369419139,
  16.232026837918763,
  14.724075685020779,
  14.811171092799647,
  13.933324619902532,
  13.092354880027884,
  12.606559157871617,
  12.145556812303232,
  11.496067804051995,
  11.425390268221008,
  10.521366592981902,
  10.744913286789975,
  10.870848525722641,
  10.18227132874878,
  9.509842392849707,
  9.334167071722932,
  9.120116521604167,
  9.610651278723399,
  8.396625614271686,
  8.481400185854323,
  8.712018029307915,
  7.938324480494269,
  8.392363509745577,
  8.14176541653411,
  8.0656972012568,
  7.7839630515219165,
  7.887144624530163,
  7.465407553788969,
  7.8478327941983395,
  7.337184432036485,
  7.352232382617712,
  6.778808736728138,
  7.317040846131553,
  6.788784026192876,
  6.408363686832849,
  6.7846315199728355,
  6.662596533164591,
  6.443900448127216,
  6.613211045222271,
  6.529972696525148,
  6.4380846950501205,
  6.233866493355962,
  6.473022004312615,
  6.299438