In [None]:
## Some basic imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
## Loading the datasets

dallas = pd.read_json('../Demographic/Dallas.json/Dallas.json')
losAngeles = pd.read_json('../Demographic/Los Angeles.json/Los Angeles.json')
philadelphia = pd.read_json('../Demographic/Philadelphia.json/Philadelphia.json')

jsonDatasets = [dallas, losAngeles, philadelphia]

parcelDallas = pd.read_csv('../parcelData/cleanDallas.csv')
parcelPhiladelphia = pd.read_csv('../parcelData/cleanPhiladelphia.csv')
parcelSocal = pd.read_csv('../parcelData/cleanSocal.csv')

parcelDallas['City'] = 'Dallas'
parcelPhiladelphia['City'] = 'Philadelpia'
parcelSocal['City'] = 'Socal'

parcelDallas['recrdareano'] = 0
parcelDallas['saleprice'] = 179120
parcelDallas['numstories'] = 0
parcelDallas['taxamt'] = 0

parcelSocal['saleprice'] = 634506
parcelSocal['numstories'] = 0
parcelSocal['taxamt'] = 0

parcelCombined = pd.concat([parcelDallas, parcelPhiladelphia, parcelSocal])

In [None]:
parcelCombined = parcelCombined.drop(['saleprice', 'numstories', 'taxamt', 'owner', 'owner2', 'owner3', 'mailadd', 'parcelnumb', 'qoz', 'saledate', 'zoning_description', 'structno', 'usedesc', 'zoning', 'recrdareano'], axis=1)
print(parcelCombined)

In [None]:
from sklearn.preprocessing import OneHotEncoder

parcelCombined = pd.get_dummies(parcelCombined, columns=['City'], prefix='city')
print(parcelCombined)

In [None]:
X = parcelCombined.loc[:, parcelCombined.columns != 'landval']
y = parcelCombined['landval']

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=1)

In [None]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

X_train = normalize(X_train)
X_test = normalize(X_test)
print(X_train)

In [None]:
# Start putting together the neural network 
import keras_tuner
from keras.models import Sequential
from keras.layers.core import Dense

In [None]:

def build_model(hp):
    model = Sequential()
    for i in range(hp.Int('layers', 2, 10)):
        model.add(Dense(units=hp.Int('units_' + str(i),
                                            min_value=16,
                                            max_value=256,
                                            step=32),
                               activation='relu'))
    model.add(Dense(1))
    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=['mse'])
    return model



In [None]:

tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_mse',
    max_trials=15,
    executions_per_trial=4,
    directory='model_dir',
    project_name='House_Price_Prediction')
 
tuner.search(X_train,y_train,batch_size=10,epochs=20,validation_data=(X_test, y_test))

In [None]:
best_model = tuner.get_best_models()[0]

In [None]:
best_model.fit(X_train, y_train)

In [None]:
classifier = Sequential()

In [None]:
# Compile the model
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
# Train the model
classifier.fit(X_train, y_train, batch_size = 10, epochs = 14)

In [None]:
result = classifier.predict(X_test)


In [None]:
print(result)

In [None]:
# Split into training sets 

X_train, X_test, Y_train, Y_test = pd.DataFrame()
for dataset in jsonDatasets:
    temp_X_train, temp_X_test, temp_Y_train, temp_Y_test = getTrainSplit(dataset)

    X_train = pd.concat([X_train, temp_X_train], ignore_index=True)
    X_test = pd.concat([X_test, temp_X_test], ignore_index=True)
    Y_train = pd.concat([Y_train, temp_Y_train], ignore_index=True)
    Y_test = pd.concat([Y_test, temp_Y_test], ignore_index=True)

