## Boston Housing ##

Let's build a regressor and train it using a k-fold cross-validation approach.

In [None]:
from tensorflow.keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) =  boston_housing.load_data()

# Let's take a look at the data

print(train_data.shape, train_targets.shape)
print(train_data[0,:], train_targets[0])

train_X = train_data
train_Y = train_targets

test_X = test_data
test_Y = test_targets

Just for grins, we go ahead and build a model.

In [None]:
from keras import models
from keras import layers
import numpy as np

model = models.Sequential()
model.add(layers.Dense(64, activation='relu',
                           input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
model.fit(train_X, train_Y, epochs = 10, batch_size=32, validation_split=0.2)
  

One thing we can do is normalize the input.  

In [None]:
mean = train_X.mean(axis=0)
train_X -= mean
std = train_X.std(axis=0)
train_X /= std

test_X -= mean
test_X /= std

model.fit(train_X, train_Y, epochs = 25, batch_size=32, validation_split=0.2)

model.evaluate(test_X, test_Y)

Now we will train using k fold-cross validation.

In [None]:
def build_model():
    # Because we will need to instantiate
    # the same model multiple times,
    # we use a function to construct it.
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',
                           input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

# You could combine training and test datasets here ...

k = 4
num_val_samples = len(train_data) // k
num_epochs = 2
all_val_maes = []
all_preds = []
all_targets = []
for i in range(k):
    print('processing fold #', i)
    # Prepare the validation data: data from partition # k
    val_X = train_X[i * num_val_samples: (i + 1) * num_val_samples]
    val_Y = train_Y[i * num_val_samples: (i + 1) * num_val_samples]

    # Prepare the training data: data from all other partitions
    partial_train_X = np.concatenate(
        [train_X[:i * num_val_samples],
         train_X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_Y = np.concatenate(
        [train_Y[:i * num_val_samples],
         train_Y[(i + 1) * num_val_samples:]],
        axis=0)

    # Build the Keras model (already compiled)
    model = build_model()
    # Train the model (in silent mode, verbose=0)
    history = model.fit(partial_train_X, partial_train_Y,
                        validation_data=(val_X, val_Y),
                        epochs=num_epochs, batch_size=1, verbose=1)
    # Evaluate the model on the validation data
    val_mse, val_mae = model.evaluate(val_X, val_Y, verbose=0)
    all_val_maes.append(val_mae)

    all_preds.append(model.predict(val_X).flatten().tolist())
    all_targets.append(val_Y.flatten().tolist())
    
print("Average mae on validation accross folds: ", sum([i / len(all_val_maes) for i in all_val_maes]))

flatten = lambda l: [item for sublist in l for item in sublist]
all_preds = flatten(all_preds)
all_targets = flatten(all_targets)

from sklearn.metrics import mean_absolute_error
print("mae on validation set: ", mean_absolute_error(all_targets, all_preds))

