In [1]:
## Some basic imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
## Loading the datasets

dallas = pd.read_json('../Demographic/Dallas.json/Dallas.json')
losAngeles = pd.read_json('../Demographic/Los Angeles.json/Los Angeles.json')
philadelphia = pd.read_json('../Demographic/Philadelphia.json/Philadelphia.json')

jsonDatasets = [dallas, losAngeles, philadelphia]

parcelDallas = pd.read_csv('../parcelData/cleanDallas.csv')
parcelPhiladelphia = pd.read_csv('../parcelData/cleanPhiladelphia.csv')
parcelSocal = pd.read_csv('../parcelData/cleanSocal.csv')

parcelDallas['City'] = 'Dallas'
parcelPhiladelphia['City'] = 'Philadelpia'
parcelSocal['City'] = 'Socal'

parcelDallas['recrdareano'] = 0
parcelDallas['saleprice'] = 179120
parcelDallas['numstories'] = 0
parcelDallas['taxamt'] = 0

parcelSocal['saleprice'] = 634506
parcelSocal['numstories'] = 0
parcelSocal['taxamt'] = 0

parcelCombined = pd.concat([parcelDallas, parcelPhiladelphia, parcelSocal])

In [3]:
parcelCombined = parcelCombined.drop(['numstories', 'taxamt', 'owner', 'owner2', 'owner3', 'mailadd', 'parcelnumb', 'qoz', 'saledate', 'zoning_description', 'structno', 'usedesc', 'zoning', 'recrdareano'], axis=1)
print(parcelCombined)

     ll_gissqft  ll_gisacre  landval    parval    City  saleprice
0         15916     0.36537    40000    193080  Dallas   179120.0
1         12762     0.29297    63790    276580  Dallas   179120.0
2          8085     0.18559    35000    147410  Dallas   179120.0
3          7220     0.16575    25000    103660  Dallas   179120.0
4          9004     0.20670    22500    130710  Dallas   179120.0
..          ...         ...      ...       ...     ...        ...
149       94676     2.17341  7358249  13717878   Socal   634506.0
150       58397     1.34057   360519   1162088   Socal   634506.0
151       18682     0.42886  1664640   2809080   Socal   634506.0
152       23281     0.53445  1414008   2676515   Socal   634506.0
153       18083     0.41512  1388758   2083135   Socal   634506.0

[4121 rows x 6 columns]


In [4]:
from sklearn.preprocessing import OneHotEncoder

parcelCombined = pd.get_dummies(parcelCombined, columns=['City'], prefix='zone')
print(parcelCombined)

     ll_gissqft  ll_gisacre  landval    parval  saleprice  zone_Dallas  \
0         15916     0.36537    40000    193080   179120.0            1   
1         12762     0.29297    63790    276580   179120.0            1   
2          8085     0.18559    35000    147410   179120.0            1   
3          7220     0.16575    25000    103660   179120.0            1   
4          9004     0.20670    22500    130710   179120.0            1   
..          ...         ...      ...       ...        ...          ...   
149       94676     2.17341  7358249  13717878   634506.0            0   
150       58397     1.34057   360519   1162088   634506.0            0   
151       18682     0.42886  1664640   2809080   634506.0            0   
152       23281     0.53445  1414008   2676515   634506.0            0   
153       18083     0.41512  1388758   2083135   634506.0            0   

     zone_Philadelpia  zone_Socal  
0                   0           0  
1                   0           0  
2  

In [5]:
X = parcelCombined.loc[:, parcelCombined.columns != 'landval']
y = parcelCombined['landval']

In [6]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=1)

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
print(X_train.shape)

(3296, 7)


In [10]:
# Start putting together the neural network 
import keras_tuner
from keras.models import Sequential
from keras.layers.core import Dense

In [14]:

def build_model(hp):
    model = Sequential()
    for i in range(hp.Int('layers', 2, 10)):
        model.add(Dense(units=hp.Int('units_' + str(i),
                                            min_value=16,
                                            max_value=256,
                                            step=32),
                               activation='relu'))
    model.add(Dense(1))
    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=['mse'])
    return model



In [16]:

tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_mse',
    max_trials=10,
    executions_per_trial=3,
    directory='model_dir',
    project_name='House_Price_Prediction')
 
tuner.search(X_train,y_train,batch_size=10,epochs=20,validation_data=(X_test, y_test))

Trial 10 Complete [00h 00m 49s]
val_mse: 332956172288.0

Best val_mse So Far: 311282357589.3333
Total elapsed time: 00h 08m 43s
INFO:tensorflow:Oracle triggered exit


In [18]:
best_model = tuner.get_best_models()[0]
best_paramaters = tuner.get_best_hyperparameters()[0]
print(best_paramaters.)

<keras_tuner.engine.hyperparameters.hyperparameters.HyperParameters object at 0x0000021AB7C96560>


In [None]:
best_model.fit(X_train, y_train, batch_size = 10, epochs = 14)

In [None]:
classifier = Sequential()

In [29]:
# Compile the model
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [30]:
# Train the model
classifier.fit(X_train, y_train, batch_size = 10, epochs = 14)

Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<keras.callbacks.History at 0x13d9a53d0f0>

In [31]:
result = classifier.predict(X_test)




In [32]:
print(result)

[[1.]
 [1.]
 [1.]
 ...
 [1.]
 [1.]
 [1.]]


In [None]:
# Split into training sets 

X_train, X_test, Y_train, Y_test = pd.DataFrame()
for dataset in jsonDatasets:
    temp_X_train, temp_X_test, temp_Y_train, temp_Y_test = getTrainSplit(dataset)

    X_train = pd.concat([X_train, temp_X_train], ignore_index=True)
    X_test = pd.concat([X_test, temp_X_test], ignore_index=True)
    Y_train = pd.concat([Y_train, temp_Y_train], ignore_index=True)
    Y_test = pd.concat([Y_test, temp_Y_test], ignore_index=True)

