First of all, set environment variables and initialize spark context:

In [None]:
%env SPARK_DRIVER_MEMORY=8g
%env PYSPARK_PYTHON=/usr/bin/python3.5
%env PYSPARK_DRIVER_PYTHON=/usr/bin/python3.5

from zoo.common.nncontext import *
sc = init_nncontext(init_spark_conf().setMaster("local[4]"))

This dataset is packaged in Keras 2.0.8 but not in Keras 1.2.2, so that we need to use following code to get the data, then we also apply normalization on these data:

In [None]:
from keras.utils.data_utils import get_file
def load_data(path='boston_housing.npz', test_split=0.2, seed=113):
    """Loads the Boston Housing dataset.
    # Arguments
        path: path where to cache the dataset locally
            (relative to ~/.zoo.pipeline.api.keras/datasets).
        test_split: fraction of the data to reserve as test set.
        seed: Random seed for shuffling the data
            before computing the test split.
    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """
    assert 0 <= test_split < 1
    path = get_file(
        path,
        origin='https://s3.amazonaws.com/zoo.pipeline.api.keras-datasets/boston_housing.npz'
        )
    with np.load(path) as f:
        x = f['x']
        y = f['y']

    np.random.seed(seed)
    indices = np.arange(len(x))
    np.random.shuffle(indices)
    x = x[indices]
    y = y[indices]

    x_train = np.array(x[:int(len(x) * (1 - test_split))])
    y_train = np.array(y[:int(len(x) * (1 - test_split))])
    x_test = np.array(x[int(len(x) * (1 - test_split)):])
    y_test = np.array(y[int(len(x) * (1 - test_split)):])
    return (x_train, y_train), (x_test, y_test)

(train_data, train_targets), (test_data, test_targets) = load_data()

mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [None]:
import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_nb_epoch = 50
all_scores = []
for i in range(k):
    print('processing fold #', i)
    # Prepare the validation data: data from partition # k
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

    # Prepare the training data: data from all other partitions
    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
         train_data[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
         train_targets[(i + 1) * num_val_samples:]],
        axis=0)

    # Build the Keras model (already compiled)
    model = build_model()
    # Train the model (in silent mode, verbose=0)
    #model.fit(partial_train_data, partial_train_targets,
    #          nb_epoch=num_nb_epoch, batch_size=1, verbose=0)
    model.fit(partial_train_data, partial_train_targets,
              nb_epoch=num_nb_epoch, batch_size=16)

    # Evaluate the model on the validation data
    #val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    val_mae = model.evaluate(val_data, val_targets)
    all_scores.append(val_mae[0].result)

In [None]:
model = build_model()
# Train it on the entirety of the data.
model.fit(train_data, train_targets,
          nb_epoch=80, batch_size=16)
test_result = model.evaluate(test_data, test_targets)

print('test result:', test_result[0].result)