In [3]:
# loading the libraries

import pandas as pd
from keras.models import Sequential
from keras.layers import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.externals import joblib
from sklearn.metrics import mean_absolute_error

# Disable some useless pandas warnings to prevent some junk from showing up in the output window
pd.options.mode.chained_assignment = None

Using TensorFlow backend.


In [4]:
# Load our data set
df = pd.read_csv("house_data.csv")

# Create the X and y arrays
X = df[["sq_feet", "num_bedrooms", "num_bathrooms"]]
y = df[["sale_price"]]

In [7]:
# Data needs to be scaled to  0 to 1 for the neural network to train correctly.

# Creating a scaler classes for inputs and outputs dataframes
X_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaler = MinMaxScaler(feature_range=(0, 1))

# We created seperate scaler for the input and output values, so that we can use them seperately when we 
# make real predictions on unseen/new data

# Scale both the training inputs and outputs using the above created classes
X[X.columns] = X_scaler.fit_transform(X[X.columns])
y[y.columns] = y_scaler.fit_transform(y[y.columns])

  return self.partial_fit(X, y)
  return self.partial_fit(X, y)


In [18]:
# Split the data set in a training set (75%) and a test set (25%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

X_train.head()

Unnamed: 0,sq_feet,num_bedrooms,num_bathrooms
399,0.371535,0.333333,0.0
3733,0.710489,0.666667,0.0
8185,0.107745,0.333333,0.5
6060,0.22635,0.333333,0.5
1083,0.935124,0.666667,0.5


In [None]:
# Create a Neural Network model
model = Sequential() # model object to which we will add our layers
model.add(Dense(50, input_dim=3, activation='relu')) # X_train has 3 columns so input_dim=3
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear')) # we are predicting house value, so one node in output layer

# Keras uses Tensorflow behind the scenes to do all the math. Now that we have declared all the layers, 
# we need Keras to construct the Neural Network inside the TensorFlow using the compile function.
model.compile(
    loss='mean_squared_error', 
    optimizer='SGD'
)

# loss is the cost function to measure how wrong neural network currently is. Since we are predicting 
# continuous values, mean squared error is a good choice.

# optimizer is which numerical optimization algorithm we will use to train the neural network. 
# We’ll use stochastic gradient descent. This is the exact same thing as the normal gradient descent algorithm 
# except that it works with batches of training data instead of processing all the training data at once.

In [33]:
# Train the model
model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=8,
    shuffle=True,
    verbose=2
)

# epochs is how many times we will loop through the entire training dataset before ending the gradient descent training process.
# batch_size controls how many training examples are considered at once during each gradient descent update pass.
# shuffle=True tells Keras to randomize the order of the input data it sees.
# verbose controls how much Keras prints on the screen during the training process. Setting it 2 prints less.

Epoch 1/50
 - 1s - loss: 3.1814e-04
Epoch 2/50
 - 1s - loss: 3.1817e-04
Epoch 3/50
 - 1s - loss: 3.1850e-04
Epoch 4/50
 - 1s - loss: 3.1740e-04
Epoch 5/50
 - 1s - loss: 3.1762e-04
Epoch 6/50
 - 1s - loss: 3.1850e-04
Epoch 7/50
 - 1s - loss: 3.1916e-04
Epoch 8/50
 - 1s - loss: 3.1758e-04
Epoch 9/50
 - 1s - loss: 3.1840e-04
Epoch 10/50
 - 1s - loss: 3.1852e-04
Epoch 11/50
 - 1s - loss: 3.1786e-04
Epoch 12/50
 - 1s - loss: 3.1791e-04
Epoch 13/50
 - 1s - loss: 3.1736e-04
Epoch 14/50
 - 1s - loss: 3.1830e-04
Epoch 15/50
 - 1s - loss: 3.1787e-04
Epoch 16/50
 - 1s - loss: 3.1794e-04
Epoch 17/50
 - 1s - loss: 3.1871e-04
Epoch 18/50
 - 1s - loss: 3.1806e-04
Epoch 19/50
 - 1s - loss: 3.1826e-04
Epoch 20/50
 - 1s - loss: 3.1769e-04
Epoch 21/50
 - 1s - loss: 3.1729e-04
Epoch 22/50
 - 1s - loss: 3.1825e-04
Epoch 23/50
 - 1s - loss: 3.1811e-04
Epoch 24/50
 - 1s - loss: 3.1853e-04
Epoch 25/50
 - 1s - loss: 3.1853e-04
Epoch 26/50
 - 1s - loss: 3.1804e-04
Epoch 27/50
 - 1s - loss: 3.1806e-04
Epoch 28/5

<keras.callbacks.History at 0x12c57c160>

In [23]:
# Save the scalers to files so we can use it to pre-process new data later
joblib.dump(X_scaler, "X_scaler.pkl")
joblib.dump(y_scaler, "y_scaler.pkl")

# Save the trained model to a file so we can use it to make predictions later
model.save("house_value_model.h5")

In [31]:
# Report how well the model is performing
print("Model training results:")

# Report mean absolute error on the training set in a value scaled back to dollars so it's easier to understand.
predictions_train = model.predict(X_train, verbose=0)

mse_train = mean_absolute_error(
    y_scaler.inverse_transform(predictions_train),
    y_scaler.inverse_transform(y_train)
)
print(f" - Training Set Error: {mse_train}")

Model training results:
 - Training Set Error: 9112.727251041666


In [32]:
# Report mean absolute error on the test set in a value scaled back to dollars so it's easier to understand.
predictions_test = model.predict(X_test, verbose=0)

mse_test = mean_absolute_error(
    y_scaler.inverse_transform(predictions_test),
    y_scaler.inverse_transform(y_test)
)
print(f" - Test Set Error: {mse_test}")


 - Test Set Error: 9326.21158125


In [None]:
# how well your model is working:
#• If Test Set Error is reasonably low and only a little higher than Training Set Error, then everything is working correctly!
#• If Training Set Error and Test Set Error are both really high,the model is failing to learn at all. This is called underfitting.
#• If Training Set Error is low but Test Set Error is high, the model is memorizing the training data and not really learning anything. This is called overfitting.