In [1]:
from __future__ import absolute_import, division, print_function

In [2]:
import tensorflow as tf

# Import Boston Housing dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.boston_housing.load_data()

In [3]:
# Shape of data
print(X_train.shape)
print(X_test.shape)

(404, 13)
(102, 13)


In [4]:
# Display the first line of training dataset
print(X_train[0])
print(y_train[0])

[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]
15.2


### Normalizing the data

In [5]:
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

X_train -= mean
X_train /= std

X_test -= mean
X_test /= std

### Model

In [6]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense

def reg_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

### As the dataset is too small. We shall use K-fold validation

In [7]:
import numpy as np
k=4
num_val_samples = len(X_train)//k
num_epochs = 100
all_scores =[]

for i in range(k):
    
    #validation data from partition #k
    val_data = X_train[i * num_val_samples: (i+1)*num_val_samples]
    val_labels = y_train[i* num_val_samples : (i+1)*num_val_samples]
    
    #training data from other partitions
    partial_train_data = np.concatenate([X_train[:i*num_val_samples], 
                                         X_train[(i+1)*num_val_samples:]], axis=0)
    partial_train_labels = np.concatenate([y_train[:i*num_val_samples], 
                                            y_train[(i+1)*num_val_samples:]], axis=0)
    
    model = reg_model()
    model.fit(partial_train_data, partial_train_labels, 
              epochs= num_epochs, batch_size=10, verbose=0)
    
    val_mse, val_mae = model.evaluate(val_data, val_labels, verbose=0)
    print('processing fold #', i, 'with mean absolute error:', val_mae)
    all_scores.append(val_mae)

W0830 01:47:00.579808 4394878400 deprecation.py:506] From /Users/zohebabai/anaconda/envs/TensorflowProjects/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


processing fold # 0 with mean absolute error: 2.3971307
processing fold # 1 with mean absolute error: 2.38109
processing fold # 2 with mean absolute error: 2.6459222
processing fold # 3 with mean absolute error: 2.5862603


In [8]:
# Mean MAE of all folds
np.mean(all_scores)

2.5026007

**We are off by $2500 on average from house prices**

### Test the model

In [9]:
test_mse_score, test_mae_score = model.evaluate(X_test, y_test)
test_mae_score



3.060397