In [3]:
# Importing Libraries
import pandas as pd
from pandas_profiling import ProfileReport
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import numpy as np
import pandarallel
from sklearn.preprocessing import StandardScaler, Normalizer, Binarizer
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from kerastuner.tuners import RandomSearch, Sklearn
from sklearn.model_selection import train_test_split
from kerastuner.engine.hyperparameters import HyperParameters
from keras.activations import relu
from keras.optimizers import RMSprop
import math

In [4]:
filename = "./EDA Notebook.ipynb.csv"

In [5]:
df = pd.read_csv(filename)

In [6]:
df.head()

Unnamed: 0,id,property_type,room_type,accommodates,bathrooms,bed_type,cancellation_policy,cleaning_fee,city,host_identity_verified,instant_bookable,latitude,longitude,review_scores_rating,bedrooms,beds,price
0,21230903,Apartment,Entire home/apt,4,2.0,Real Bed,strict,1,LA,t,f,34.100927,-118.346146,97.0,2.0,2.0,350.0
1,21228356,House,Private room,2,1.0,Real Bed,strict,0,NYC,t,t,40.698569,-73.742247,91.0,1.0,2.0,47.0
2,21227461,House,Private room,2,0.0,Real Bed,strict,1,LA,t,f,34.08583,-118.130391,88.0,1.0,1.0,34.0
3,21218973,Apartment,Private room,2,1.0,Real Bed,moderate,1,NYC,t,t,40.825218,-73.950479,100.0,1.0,1.0,55.0
4,21215451,House,Entire home/apt,3,1.0,Real Bed,flexible,1,Chicago,t,f,41.959518,-87.683155,100.0,1.0,2.0,130.0


In [None]:
df.shape

In [None]:
# Avoiding data leakage by dropping this here. Need to add this back into the EDA notebook and delete this
df.drop(columns='log_price', inplace=True)

In [None]:
# Have to reset the index after importing CSV from EDA notebook. Indexes just don't carryover when 
# converted to output data
df.set_index("id", inplace=True)

In [None]:
# Splitting data into target and feature matrix
target = 'price'
X = df.drop(columns=target)
y = df[target]

In [None]:
# Sanity Check
assert len(X) == len(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Building Our Model
---
- Without preprocessing this time around

In [None]:
def build_regression_model(hp):
    
    """
    Returns a complied keras model ready for keras-tuner gridsearch algorithms 
    """
    
    model = Sequential()
    
    # hidden layer
    model.add(Dense(units=hp.get('units'),activation=hp.get("activation")))
    
    # output layer
    model.add(Dense(1, activation='relu'))
    
    model.compile(
        optimizer=tf.keras.optimizers.RMSprop(hp.get('learning_rate')),
        loss='mse',
        metrics=['mae'])
    
    return model

In [None]:
# build out our hyperparameter dictionary 
hp = HyperParameters()
hp.Int('units', min_value=32, max_value=512, step=32)
hp.Choice('learning_rate',values=[1e-1, 1e-2, 1e-3])
hp.Choice('activation',values=["relu"])

In [None]:
X

In [None]:
n_unique_hparam_combos = len(range(32,512+32, 32)) * 3 *2
n_param_combos_to_sample = n_unique_hparam_combos * .25

In [None]:
random_tuner = RandomSearch(
            build_regression_model,
            objective='val_accuracy',
            max_trials=n_param_combos_to_sample, # number of times to sample the parameter set and build a model 
            seed=1234,
            hyperparameters=hp, # pass in our hyperparameter dictionary
            directory='./keras-tuner-trial',
            project_name='random_search')

In [None]:
# take note of Total elapsed time in print out
random_tuner.search(X_train,
                    y_train,
                    epochs=3,
                    validation_data=(X_test, y_test))
# Need to go back to EDA Notbeook and finish preprocessing data before I can do this
# Can use this as reference: https://stackoverflow.com/questions/58636087/tensorflow-valueerror-failed-to-convert-a-numpy-array-to-a-tensor-unsupporte