In [112]:
!pip install keras-tuner



In [113]:
# Dependencies
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import keras_tuner as kt

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [114]:
# Read in our Data Source into a DataFrame
csv_file_path = "/content/drive/My Drive/realtor-data.csv"
realtor_data_df = pd.read_csv(csv_file_path)
realtor_data_df.head()

Unnamed: 0,brokered_by,status,price,bed,bath,acre_lot,street,city,state,zip_code,house_size,prev_sold_date
0,103378.0,for_sale,105000.0,3.0,2.0,0.12,1962661.0,Adjuntas,Puerto Rico,601.0,920.0,
1,52707.0,for_sale,80000.0,4.0,2.0,0.08,1902874.0,Adjuntas,Puerto Rico,601.0,1527.0,
2,103379.0,for_sale,67000.0,2.0,1.0,0.15,1404990.0,Juana Diaz,Puerto Rico,795.0,748.0,
3,31239.0,for_sale,145000.0,4.0,2.0,0.1,1947675.0,Ponce,Puerto Rico,731.0,1800.0,
4,34632.0,for_sale,65000.0,6.0,2.0,0.05,331151.0,Mayaguez,Puerto Rico,680.0,,


# Data Cleaning and Processing

In [115]:
# Drop NA Values and Duplicate Values
clean_df = realtor_data_df.dropna()
clean_df = clean_df.drop_duplicates()

In [116]:
# Filter Data to include only records where a house has been sold (status = sold) for the state of Michigan (state = Michigan)
filtered_df = clean_df[clean_df['status']=='sold']

michigan_df = filtered_df[clean_df['state']=='Michigan']

  michigan_df = filtered_df[clean_df['state']=='Michigan']


In [117]:
# Drop Columns We Will Not Need For Our Neural Network Model
michigan_df = michigan_df.drop(columns=(['status','street','city','state','prev_sold_date','brokered_by', 'zip_code', 'acre_lot']))

In [118]:
# Filter the price of a house to be Between $50,000 and $1,000,000
michigan_df = michigan_df[(michigan_df['price']>50000) & (michigan_df['price']<1000000)]

michigan_df.head()

Unnamed: 0,price,bed,bath,house_size
1765911,850000.0,5.0,4.0,3034.0
1765912,159900.0,3.0,2.0,1368.0
1765913,249900.0,4.0,2.0,1275.0
1765914,220000.0,2.0,1.0,1472.0
1765916,79900.0,2.0,1.0,696.0


In [119]:
# Shape of DataFrame
michigan_df.shape

(4317, 4)

In [120]:
# Set Up X and y (X will drop target, y will be only target from the cleaned df)
X = michigan_df.drop(columns=['price'])
y = michigan_df['price']

In [121]:
# Split Dataset into Training and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [122]:
# Create scaler instance
X_scaler = StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [123]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=80,
        step=2), activation=activation, input_dim=len(X_test_scaled[0])))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 3, 6)):
        nn_model.add(Dense(units=hp.Int('units_' + str(i),
            min_value=3,
            max_value=10,
            step=2),
            activation=activation))

    nn_model.add(Dense(units=1, activation="linear")) # Research finale activation function

    # Compile the model
    nn_model.compile(loss="mean_squared_error", optimizer='adam', metrics=["mse"]) # mse

    return nn_model

In [124]:
# Create Tuner
tuner = kt.Hyperband(
    create_model,
    objective="mse",
    max_epochs=20,
    hyperband_iterations=2)
# research objective options (mse, ...)

Reloading Tuner from ./untitled_project/tuner0.json


In [125]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

In [126]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 75,
 'num_layers': 6,
 'units_0': 3,
 'units_1': 9,
 'units_2': 9,
 'units_3': 9,
 'units_4': 3,
 'units_5': 7,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0020'}

In [127]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_mse = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Mean_Square_Error: {model_mse}")



27/27 - 1s - loss: 13481563136.0000 - mse: 13481563136.0000 - 674ms/epoch - 25ms/step
Loss: 13481563136.0, Mean_Square_Error: 13481563136.0


In [128]:
# Notes

# Mean Square Error
# Root Mean Square Error (Use Metric in Model!!!)
# Mean Absolute Error
# Accruracy
# r square
#