In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [3]:
# Dependencies
import numpy as np
import pandas as pd
import datetime as dt

In [4]:
import tensorflow
tensorflow.keras.__version__
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

In [5]:
houses = pd.read_csv('Resources/home_data.csv')

cut_labels_20 = ['<125k', '125-150k', '150k-175k', '175k-200k', 
                 '200k-220k', '220k-240k', '240k-260k', '260k-280k', '280k-300k',
                 '300k-320k', '320k-340k', '340k-360k', '360k-380k', '380k-400k', 
                 '400k-420k', '420k-440k', '440k-460k', '460k-480k', '480k-500k', 
                 '500k+']
cut_bins = [0, 125000, 150000, 175000, 
            200000, 220000, 240000, 260000, 280000, 
            300000, 320000, 340000, 360000, 380000, 
            400000, 420000, 440000, 460000, 480000,
            500000, 10000000]


## Data Pre-Processing

In [6]:
# Add Price Range Labels
houses["price_range"] = pd.cut(houses['price'], bins=cut_bins, labels=cut_labels_20)

In [7]:
# Add Label Encoded the zipcode data
label_encoder = LabelEncoder()
label_encoder.fit(houses["zipcode"])
houses["labeled_zipcode"] = label_encoder.transform(houses["zipcode"])

houses["day_sold"] = pd.to_datetime(houses["date"]).map(dt.datetime.toordinal)

In [8]:
#X_trimmed = houses[["bedrooms","bathrooms","sqft_living","sqft_lot","floors","waterfront","view","condition","grade","sqft_above","sqft_basement","yr_built","yr_renovated","zipcode"]]
#X_trimmed = houses.drop(["id","date","price","zipcode","lat","long","sqft_living15","sqft_lot15","price_range"], axis=1)
#X_trimmed = houses.drop(["id","date","price","waterfront","view","yr_renovated","zipcode","lat","long","sqft_living15","sqft_lot15","price_range"], axis=1)
X_coords = houses.drop(["id","date","price","waterfront","view","yr_renovated","zipcode","sqft_living15","sqft_lot15","price_range","labeled_zipcode"], axis=1)
X_zipcodes = houses.drop(["id","date","price","waterfront","view","yr_renovated","zipcode","lat","long","sqft_living15","sqft_lot15","price_range"], axis=1)
X_coords_15 = houses.drop(["id","date","price","waterfront","view","yr_renovated","zipcode","price_range","labeled_zipcode"], axis=1)
X_coords_15_only = houses.drop(["id","date","price","sqft_living","sqft_lot","waterfront","view","yr_renovated","zipcode","price_range","labeled_zipcode"], axis=1)

X_coords_ss = houses.drop(["id","date","price","waterfront","view","yr_renovated","zipcode","sqft_living15","sqft_lot15","price_range","labeled_zipcode","day_sold"], axis=1)


#X_trimmed = X_zipcodes
#X_trimmed = X_coords
X_trimmed = X_coords_15
#X_trimmed = X_coords_15_only

X_trimmed = X_coords_ss

y_prices = houses["price"]
y_ranges = houses["price_range"]

## Data Exploration

In [None]:
houses.drop(["id","date","price","waterfront","view"], axis=1).describe()
houses.drop(["id","date","waterfront","view","condition","grade","zipcode","price_range","labeled_zipcode","lat","long"], axis=1).head(10)


In [None]:
X_trimmed

In [None]:
# Data
y_prices.describe

In [None]:
y_prices.plot(kind="hist")

In [None]:
# Filter out expensive houses here if we want to 

affordable = houses[houses["price"] <= 1000000]["price"]
affordable.plot(kind="hist")

In [None]:
affordable.describe()

In [None]:
houses.shape

In [None]:
#X = houses.drop(["price", "date"], axis=1)
y = houses["price"].values.reshape(-1,1)
print(X_trimmed.shape, y.shape)


In [None]:
y_prices = houses["price"]
y_prices.head()

## Encoding and Splitting Data

In [9]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model


In [None]:
# We use this code only when we are trying to use binning of the y data
# Step 1: Label-encode data set
#label_encoder = LabelEncoder()
#label_encoder.fit(y_ranges)
#encoded_y_train = label_encoder.transform(y_train)
#encoded_y_test = label_encoder.transform(y_test)


In [None]:
# We use this code only when we are trying to use binning of the y data
# Step 2: Convert encoded labels to one-hot-encoding
#y_train_categorical = to_categorical(encoded_y_train)
#y_test_categorical = to_categorical(encoded_y_test)
#y_train_categorical

In [None]:
# LabelEncode the zipcode data


In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X_trimmed, y_prices, random_state=42)

In [None]:
y_train

In [None]:
y_test

In [11]:
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a Deep Learning Model

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import r2_score

In [14]:
# Create model and add layers
multiplier = 4
num_of_layers = 5

#mid_activation = "relu"   # Dom tells us to use this one
mid_activation = "selu"

#final_activation = "softmax"  # Classification
final_activation = "linear"

num_inputs = X_train_scaled[0].size
num_units = multiplier * num_inputs

model = Sequential()
model.add(Dense(units=num_units, activation=mid_activation, input_dim=num_inputs))
model.add(Dense(units=num_units, activation=mid_activation))
model.add(Dense(units=num_units, activation=mid_activation))

#model.add(Dense(units=20, activation='softmax'))  # Classification
model.add(Dense(units=1, activation=final_activation))

In [None]:
# Compile and fit the model
#model.compile(optimizer='adam',
#              loss='categorical_crossentropy',
#              metrics=['accuracy'])

# Compile and fit the model
model.compile(optimizer='adam',
              loss='mean_squared_error'#,
              #metrics=['accuracy']
             )

In [None]:
model.summary()

In [None]:
model.fit(
    #X_train_scaled,
    #y_train_categorical,
    X_train_scaled,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)


## Quantify our Trained Model

In [None]:
#model_loss, model_accuracy = model.evaluate(
#    X_test, y_test_categorical, verbose=2)

#model_loss, model_accuracy = model.evaluate(
#    X_test_scaled, y_test, verbose=2)

y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

train_value = r2_score(y_train, y_train_pred)
test_value = r2_score(y_test, y_test_pred)

#print(
#    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

#print(train_value)
#print(test_value)

str_train_value = "{:.2f}".format(train_value * 100)
str_test_value = "{:.2f}".format(test_value * 100)

print("This model accounts for", str_train_value, "% of the training data forces on price.")
print("This model accounts for", str_test_value, "% of the test data forces on price.")


## Save our Trained Model

In [None]:
model.save("Trained_Models/chris_trained_model.h5")

## Load our Trained Model

In [16]:
model = load_model("Trained_Models/chris_best_model.h5")

In [17]:
#model_loss, model_accuracy = model.evaluate(
#    X_test, y_test_categorical, verbose=2)

#model_loss, model_accuracy = model.evaluate(
#    X_test_scaled, y_test, verbose=2)

y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

train_value = r2_score(y_train, y_train_pred)
test_value = r2_score(y_test, y_test_pred)

#print(
#    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

#print(train_value)
#print(test_value)

str_train_value = "{:.2f}".format(train_value * 100)
str_test_value = "{:.2f}".format(test_value * 100)

print("This model accounts for", str_train_value, "% of the training data forces on price.")
print("This model accounts for", str_test_value, "% of the test data forces on price.")


This model accounts for 87.82 % of the training data forces on price.
This model accounts for 85.43 % of the test data forces on price.


## Make Predictions

In [None]:
encoded_predictions = model.predict_classes(X_test[:100])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
prediction_labels

In [None]:
encoded_predictions = model.predict_classes(X_test)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
prediction_labels

In [None]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test_categorical[:20])}")

In [None]:
to_categorical y_test_categorical[:20]

In [None]:
label_encoder.inverse_transform(y_test_categorical[:20])

## WARNING - This is for finding a better model than others

In [18]:
###############################################
### WARNING! This is for parametric study!  ###
### This will take a very long time to run! ###
###############################################

list_of_multipliers = [3, 4, 5]
list_of_num_of_layers = [2, 3, 4, 5, 6]
list_of_middle_layer_activations = ["relu","selu"]
list_of_final_activations = ["linear"]

list_of_multipliers = [4, 5, 6]
list_of_num_of_layers = [4, 5, 6, 7]

num_inputs = X_train_scaled[0].size
epoch_start = 20
epoch_inc = 10
epoch_stop = 200

verbosity = 0

this_is_not_a_drill = True
base_file_name = "Parametric_Study_Models/retest_trained_linear_coords_" 


for mid_activation in list_of_middle_layer_activations:

    activation_file_name = f"{mid_activation}_"
    for multiplier in list_of_multipliers:

        num_units = multiplier * num_inputs

        for num_of_layers in list_of_num_of_layers:

            ### Construct the model layers ###
            
            #print("New Model")
            model = Sequential()
            #print("Adding first layer")
            model.add(Dense(units=num_units, activation=mid_activation, input_dim=num_inputs))
            multiplier_file_name = f"{multiplier}x"

            for i in range(num_of_layers-1):

                #print("Adding another layer")
                model.add(Dense(units=num_units, activation=mid_activation))
                multiplier_file_name += f"{multiplier}x"

            #print("Adding final layer")
            #model.add(Dense(units=20, activation='softmax'))  # Classification
            model.add(Dense(units=1, activation='linear'))  # Regression
        
        
            ### Compile the Model ###
            
            if this_is_not_a_drill:
                model.compile(optimizer='adam',
                    loss='mean_squared_error'#,
                    #metrics=['accuracy']
                    )
    
            if not this_is_not_a_drill:
                model.summary()
    
            ### Begin fitting the model ###
        
            First_Pass = True
            epoch_cnt = epoch_start
            
            ### Continue to increase the epochs and measure the r2 incrementally ###
            while epoch_cnt <= epoch_stop:

                if First_Pass:
                    First_Pass = False
                    epoch_file_name = f"{epoch_start}_"
                    if this_is_not_a_drill:
                        model.fit(
                            X_train_scaled,
                            y_train,
                            epochs=epoch_start,
                            shuffle=True,
                            verbose=verbosity
                            )
                else:
                    epoch_file_name = f"{epoch_cnt}_"
                    if this_is_not_a_drill:
                        model.fit(
                            X_train_scaled,
                            y_train,
                            epochs=epoch_inc,
                            shuffle=True,
                            verbose=verbosity
                            )

                ### Evaluate the model's performance ###
                
                if this_is_not_a_drill:
                    y_train_pred = model.predict(X_train_scaled)
                    y_test_pred = model.predict(X_test_scaled)

                    train_value = r2_score(y_train, y_train_pred)
                    test_value = r2_score(y_test, y_test_pred)

                    train_value_pct = "{:.2f}".format(train_value * 100)
                    test_value_pct = "{:.2f}".format(test_value * 100)
                    
                else:
                    train_value_pct = "{:.2f}".format(0.31415926 * 100)
                    test_value_pct = "{:.2f}".format(0.31415926 * 100)

                #print(train_value_pct)
                #print(test_value_pct)

                epoch_file_name += f"_{train_value_pct}__{test_value_pct}.h5"
                full_file_name = f"{base_file_name}{activation_file_name}{multiplier_file_name}{epoch_file_name}"
                
                print(f"Saving Model to File : {full_file_name}")

                if this_is_not_a_drill:
                    model.save(full_file_name)
                
                # Increment for the next iteration of the while-loop
                epoch_cnt += epoch_inc

print("Training Permutations Complete!")


Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x20__72.48__72.25.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x30__73.03__72.84.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x40__73.46__73.29.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x50__75.08__74.76.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x60__76.89__76.42.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x70__78.92__78.21.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x80__79.94__78.97.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x90__80.35__79.22.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x100__80.86__79.87.h5
Saving Model to Fi

Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x4x4x4x180__89.45__83.75.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x4x4x4x190__89.78__84.51.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_4x4x4x4x4x4x4x200__89.39__83.37.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x20__73.01__72.85.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x30__73.57__73.39.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x40__73.72__73.36.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x50__77.46__76.89.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x60__80.33__79.52.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x70__81.49__80.23.h

Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x5x5x5x150__89.79__82.93.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x5x5x5x160__90.84__83.86.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x5x5x5x170__91.63__84.09.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x5x5x5x180__91.80__83.74.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x5x5x5x190__92.15__83.35.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_5x5x5x5x5x5x5x200__90.89__82.03.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x20__72.64__72.36.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x30__73.03__72.97.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6

Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x120__89.91__84.16.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x130__91.75__84.33.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x140__92.41__84.56.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x150__93.14__83.79.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x160__93.50__83.81.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x170__93.48__84.03.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x180__93.23__83.55.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_relu_6x6x6x6x6x6x6x190__94.03__82.78.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coo

Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x90__88.88__83.73.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x100__89.56__83.34.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x110__89.55__81.51.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x120__88.63__81.64.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x130__89.39__78.49.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x140__89.53__81.15.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x150__91.13__81.67.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_4x4x4x4x4x4x4x160__91.70__79.61.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coor

Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x60__87.48__84.29.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x70__88.28__84.94.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x80__88.12__84.59.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x90__89.25__84.83.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x100__89.57__84.80.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x110__90.38__84.60.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x120__90.77__84.39.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_5x5x5x5x5x5x5x130__90.78__84.19.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_

Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x30__83.61__81.71.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x40__86.63__84.01.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x50__87.22__83.78.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x60__87.22__83.39.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x70__89.25__83.96.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x80__88.15__81.81.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x90__90.20__83.71.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_selu_6x6x6x6x6x6x6x100__91.09__84.52.h5
Saving Model to File : Parametric_Study_Models/retest_trained_linear_coords_sel

In [None]:
### Use and modify this command in your bash window to find the best performers
# ls -salt *__85.[0-9][0-9].h5