### Linear regression on Boston house prices

In [16]:
from keras import models
from keras import layers
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Download the data
from keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

In [9]:
# Look at the dataset

print train_data.shape # 404 samples, 13 features
print train_targets.shape # 404 targets

print test_data.shape # 102 samples, 13 features
print test_targets.shape # 102 targets

print train_data[10] # Each datapoint is an array with 13 entries (features)
print train_targets[10] # Each target is the price of the house (in k$)


(404, 13)
(404,)
(102, 13)
(102,)
[  9.59571   0.       18.1       0.        0.693     6.404   100.
   1.639    24.      666.       20.2     376.11     20.31   ]
12.1


In [13]:
# Perform feature wise normalization on the data

mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

train_data = train_data - mean
train_data = train_data / std
test_data = test_data - mean
test_data = test_data / std

print train_data[10]

[ 0.63391647 -0.48361547  1.0283258  -0.25683275  1.15788777  0.19313958
  1.11048828 -1.03628262  1.67588577  1.5652875   0.78447637  0.22689422
  1.04466491]


In [14]:
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

In [17]:
# Use k-fold validation because the dataset is small

k=4
num_val_samples = len(train_data) // k 
num_epochs = 100
all_scores = []

for i in range(k):
    print('processing fold #', i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples] 
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate( [train_data[:i * num_val_samples],train_data[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate( [train_targets[:i * num_val_samples],train_targets[(i + 1) * num_val_samples:]], axis=0)

    model = build_model()
    model.fit(partial_train_data, partial_train_targets,epochs=num_epochs, batch_size=1, verbose=0)
    history = model.history()
    
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    
    all_scores.append(val_mae)


('processing fold #', 0)
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
('processing fold #', 1)
('processing fold #', 2)
('processing fold #', 3)


In [18]:
print all_scores

[2.061478056529961, 2.1541910690836388, 2.944315316653488, 2.414095924632384]


In [24]:
# Train the final model

model = build_model()
model.fit(train_data, train_targets,epochs=80, batch_size=8, verbose=0)
test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)



In [25]:
print test_mae_score

2.8874634387446383
