In [None]:
# Loading the Boston housing dataset
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz


In [None]:
type(train_data)

numpy.ndarray

In [None]:
train_data.shape

(404, 13)

In [None]:
test_data.shape

(102, 13)

In [None]:
train_targets[:20]

array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4, 12.1,
       17.9, 23.1, 19.9, 15.7,  8.8, 50. , 22.5, 24.1, 27.5])

In [None]:
# Normalizing the data
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
test_data -= mean
test_data /= std

In [None]:
# Model definition
from keras import models
from keras import layers

def build_model():
  model = models.Sequential()
  model.add(layers.Dense(64, activation='relu',
  input_shape=(train_data.shape[1],)))
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dense(1))
  model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
  return model

In [None]:
# K-fold validation
import numpy as np
k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

In [None]:
num_val_samples

101

In [None]:
for i in range(k):
  print('processing fold #', i)
  val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
  val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

  partial_train_data = np.concatenate(
      [train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], 
      axis=0)
  
  partial_train_targets = np.concatenate(
      [train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]],
      axis=0)
  
  model = build_model()
  model.fit(partial_train_data, 
            partial_train_targets, 
            epochs=num_epochs, 
            batch_size=1, 
            verbose=0)
  
  val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
  all_scores.append(val_mae)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


In [None]:
all_scores

[2.1559641361236572, 2.6864688396453857, 2.7355077266693115, 2.361189842224121]

In [None]:
np.mean(all_scores)

2.484782636165619

In [None]:
# Saving the validation logs at each fold
num_epochs = 500
all_mae_histories = []
for i in range(k):
  print('processing fold #', i)
  val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
  val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

  partial_train_data = np.concatenate(
      [train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]],
      axis=0)
  
  partial_train_targets = np.concatenate(
      [train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]],
      axis=0)

  model = build_model()
  history = model.fit(
      partial_train_data, 
      partial_train_targets,
      validation_data=(val_data, val_targets),
      epochs=num_epochs, 
      batch_size=1, 
      verbose=0)

processing fold # 0


KeyError: ignored

In [None]:
history.history.keys()

dict_keys(['loss', 'mae', 'val_loss', 'val_mae'])

In [None]:
 mae_history = history.history['val_mean_absolute_error']
all_mae_histories.append(mae_history)

KeyError: ignored

In [None]:
# Building the history of successive mean K-fold validation scores
average_mae_history = [np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

In [None]:
# Plotting validation scores
import matplotlib.pyplot as plt

plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

In [None]:
# Plotting validation scores, excluding the first 10 data points

def smooth_curve(points, factor=0.9):
  smoothed_points = []
  for point in points:
    if smoothed_points:
      previous = smoothed_points[-1]
      smoothed_points.append(previous * factor + point * (1 - factor))
    else:
      smoothed_points.append(point)
  return smoothed_points

smooth_mae_history = smooth_curve(average_mae_history[10:])

plt.plot(range(1, len(smooth_mae_history) + 1), smooth_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

In [None]:
# Training the final model
model = build_model()

model.fit(
    train_data, 
    train_targets,
    epochs=80, 
    batch_size=16, 
    verbose=0)

test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)

