In [None]:
# In this program we will train a network to predict the median price of 
# homes in a given Boston suburb in the mid 1970s. The dataset has different features
# (for example crime rate) and each feature has its own scale.

# This is a regression problem since we are dealing with continuous values rather 
# than discrete.

import numpy as np
from keras.datasets import boston_housing
from keras import layers
from keras import models

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

# First we look at the data dimensions
print('Train data: ' + str(train_data.shape))
print('Test data: ' + str(test_data.shape))

Train data: (404, 13)
Test data: (102, 13)


In [None]:
# As you can see there are 404 training samples and 102 test samples each 
# is a 13D vector. Each dimension in vector corresponds to a feature eg crimerate.

# Train/test targets are the median values of homes in thousands of dollars.
print('Train targets: ' + str(train_targets))

Train targets: [15.2 42.3 50.  21.1 17.7 18.5 11.3 15.6 15.6 14.4 12.1 17.9 23.1 19.9
 15.7  8.8 50.  22.5 24.1 27.5 10.9 30.8 32.9 24.  18.5 13.3 22.9 34.7
 16.6 17.5 22.3 16.1 14.9 23.1 34.9 25.  13.9 13.1 20.4 20.  15.2 24.7
 22.2 16.7 12.7 15.6 18.4 21.  30.1 15.1 18.7  9.6 31.5 24.8 19.1 22.
 14.5 11.  32.  29.4 20.3 24.4 14.6 19.5 14.1 14.3 15.6 10.5  6.3 19.3
 19.3 13.4 36.4 17.8 13.5 16.5  8.3 14.3 16.  13.4 28.6 43.5 20.2 22.
 23.  20.7 12.5 48.5 14.6 13.4 23.7 50.  21.7 39.8 38.7 22.2 34.9 22.5
 31.1 28.7 46.  41.7 21.  26.6 15.  24.4 13.3 21.2 11.7 21.7 19.4 50.
 22.8 19.7 24.7 36.2 14.2 18.9 18.3 20.6 24.6 18.2  8.7 44.  10.4 13.2
 21.2 37.  30.7 22.9 20.  19.3 31.7 32.  23.1 18.8 10.9 50.  19.6  5.
 14.4 19.8 13.8 19.6 23.9 24.5 25.  19.9 17.2 24.6 13.5 26.6 21.4 11.9
 22.6 19.6  8.5 23.7 23.1 22.4 20.5 23.6 18.4 35.2 23.1 27.9 20.6 23.7
 28.  13.6 27.1 23.6 20.6 18.2 21.7 17.1  8.4 25.3 13.8 22.2 18.4 20.7
 31.6 30.5 20.3  8.8 19.2 19.4 23.1 23.  14.8 48.8 22.6 33.4 21.1 

In [None]:
# We need to make all the features in the 13D vectors have the same scale so 
# we need to normalize it. We do this by using feature wise normalization 
# For each feature in the input data we subtract the mean of the feature and 
# divide by the standard deviation. This means the feature is centered around 0 
# and has unit standard deviation.

# Make sure to never use any quantity computed on test data. 
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std 

# Since we have a small sample set we use only two layers to avoid overfitting 
# In general, to avoid overfitting use smaller network

# This builds the typical model for regression problem.
def build_model():

  model = models.Sequential()
  model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dense(1))        # No activation so it is a linear layer. This allows network to output what it wants.
  model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])      # mse = mean squared error (square of difference between targets and prediction)
                                                                       # mae = mean absolute error.
  return model

In [None]:
# We could split our data into validation and training set but we don't have a lot of
# samples which will cause a high variance since it could vary a lot 
# depending on data points we use as training and validation. Instead we use a K-Fold approach
# which consists of splitting dataset into K partitions, making k identical models and training 
# each one on K-1 partitions while evaluating on remaining partition. The validation score 
# is the mean of the K validation scores.

import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

for i in range(k):
  print('Processing fold #', i)
  # Get the validation set
  val_data = train_data[i * num_val_samples : (i + 1) * num_val_samples]
  val_targets = train_targets[i * num_val_samples : (i + 1) * num_val_samples]

  # Prepares the training data. Basically just concats the data while missing out the 
  # validation set.
  partial_train_data = np.concatenate(
      [train_data[:i * num_val_samples],
       train_data[(i + 1) * num_val_samples:]],
       axis=0)
  partial_train_targets = np.concatenate(
      [train_targets[:i * num_val_samples],
       train_targets[(i + 1) * num_val_samples:]],
       axis=0)
  
  model = build_model()
  model.fit(partial_train_data, partial_train_targets,
            epochs=num_epochs, batch_size=1, verbose=0)     # Verbose = 0 means train the model silently.
  
  val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
  all_scores.append(val_mae)

print('All scores: ' + str(all_scores))
print('Mean: ' + str(np.mean(all_scores)))

Processing fold # 0
Processing fold # 1
Processing fold # 2
Processing fold # 3
All scores: [2.1424367427825928, 2.566755533218384, 2.522475242614746, 2.6809005737304688]
Mean: 2.478142023086548


In [None]:
# As you can see the average is 2.5. This means the model is off by 2500 dollars. This isnt good enough
# so lets retrain with 500 epochs

num_epochs = 500
all_mae_histories = []

for i in range(k):
  print('Processing fold #', i)
  val_data = train_data[i * num_val_samples : (i + 1) * num_val_samples]
  val_targets = train_targets[i * num_val_samples : (i + 1) * num_val_samples]

  partial_train_data = np.concatenate(
      [train_data[:i * num_val_samples],
       train_data[(i + 1) * num_val_samples:]],
       axis=0)
  partial_train_targets = np.concatenate(
      [train_targets[:i * num_val_samples],
       train_targets[(i + 1) * num_val_samples:]],
       axis=0)
  
  model = build_model()
  history = model.fit(partial_train_data, partial_train_targets,
            epochs=num_epochs, batch_size=1, verbose=0) 
  mae_history = history.history['val_mean_absolute_error']
  all_mae_histories.append(mae_history)

average_mae_history = [np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

Processing fold # 0


KeyError: ignored

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()