# Wine dataset example model
### https://predictivemodeler.com/2019/10/19/tensorflow-boston-house-prices/

In [62]:
from sklearn.datasets import load_boston
import pandas as pd
boston = load_boston()

In [63]:
data = pd.DataFrame(boston.data, columns=boston.feature_names)
data["MEDV"] = boston.target

In [64]:
train_dataset = data.sample(frac=0.8,random_state=0)
test_dataset = data.drop(train_dataset.index)
train_dataset.keys()

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'MEDV'],
      dtype='object')

In [65]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

def build_model():
  model = keras.Sequential([
    layers.Dense(50, activation='relu', input_shape=[len(train_dataset.keys())-1]),
    layers.Dense(50, activation='relu'),
    layers.Dense(50, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9)
  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [66]:
model = build_model();

In [67]:
train_stats = train_dataset.describe()
train_stats.pop("MEDV")
train_stats = train_stats.transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CRIM,405.0,3.670339,8.977954,0.00632,0.08265,0.26838,3.47428,88.9762
ZN,405.0,11.067901,22.688396,0.0,0.0,0.0,12.5,95.0
INDUS,405.0,11.097951,6.699157,0.74,5.32,9.69,18.1,27.74
CHAS,405.0,0.069136,0.253999,0.0,0.0,0.0,0.0,1.0
NOX,405.0,0.553098,0.115564,0.385,0.449,0.538,0.624,0.871
RM,405.0,6.269768,0.690552,3.561,5.878,6.193,6.619,8.78
AGE,405.0,68.241975,27.908527,2.9,45.0,76.5,94.1,100.0
DIS,405.0,3.834098,2.162173,1.1296,2.0788,3.2721,5.2146,12.1265
RAD,405.0,9.474074,8.618564,1.0,4.0,5.0,24.0,24.0
TAX,405.0,405.839506,164.057951,187.0,284.0,334.0,666.0,711.0


In [68]:
train_labels = train_dataset.pop('MEDV')
test_labels = test_dataset.pop('MEDV')

In [69]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [70]:
# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)

history = model.fit(normed_train_data, train_labels, epochs=1000,
                    validation_split = 0.2, verbose=1, callbacks=[early_stop])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000


In [16]:
model.save("files/boston_model.h5")

In [71]:
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result

array([[21.670946],
       [47.278618],
       [24.05969 ],
       [ 8.908265],
       [20.650394],
       [19.37328 ],
       [21.825243],
       [19.240065],
       [16.636889],
       [17.801283]], dtype=float32)