-
Notifications
You must be signed in to change notification settings - Fork 3
/
realtyAllPredict.py
144 lines (117 loc) · 3.91 KB
/
realtyAllPredict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
###
# realtyAllPredict.py
#
# Predicts housing prices for the properties in data2018.csv for the year 2021.
#
# Outputs predict.csv
###
import csv
import numpy as np
import pandas as pd
from neupy import plots
from neupy import environment
import matplotlib.pyplot as pl
from sklearn import preprocessing
from neupy.estimators import rmsle
from neupy import algorithms, layers
import sklearn.learning_curve as curves
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
# Read training data files and testing data files
data = pd.read_csv('./new_data/dataAll.csv')
# Shuffle data
data = data.iloc[np.random.permutation(len(data))]
test = pd.read_csv('./new_data/future_values.csv')
# Get specific columns that we will be using and transfer them into the appropriate type
# For training data
lat = data["latitude"]
lon = data["longitude"]
year = data["year"]
bdrms = data["bedrooms"]
fbath = data["full_bth"]
hbath = data["half_bth"]
sf = data["square_foot"]
res = data["res"]
condo = data["condo"]
built = data["yr_built"]
bldg = data["bldg_price"]
land = data["land_price"]
lat.astype('float')
lon.astype('float')
year.astype('float')
bdrms.astype('float')
fbath.astype('float')
hbath.astype('float')
sf.astype('float')
res.astype('float')
condo.astype('float')
built.astype('float')
bldg.astype('float')
land.astype('float')
# Get specific columns that we will be using and transfer them into the appropriate type
# For testing data
lat_test = test["latitude"]
lon_test = test["longitude"]
year_test = test["year"]
bdrms_test = test["bedrooms"]
fbath_test = test["full_bth"]
hbath_test = test["half_bth"]
sf_test = test["square_foot"]
res_test = test["res"]
condo_test = test["condo"]
built_test = test["yr_built"]
lat_test.astype('float')
lon_test.astype('float')
year_test.astype('float')
bdrms_test.astype('float')
fbath_test.astype('float')
hbath_test.astype('float')
sf_test.astype('float')
res_test.astype('float')
condo_test.astype('float')
built_test.astype('float')
# Concatenate all values above
data = pd.concat([lat,lon,year,bdrms,fbath,hbath,sf,res,condo,built], axis = 1)
test = pd.concat([lat_test,lon_test,year_test,bdrms_test,fbath_test,hbath_test,sf_test,res_test,condo_test,built_test], axis = 1)
# Target values. Currently bldg_price, land_price
target = pd.concat([bldg,land],axis=1)
# Normalize data
data_scaler = preprocessing.MinMaxScaler()
target_scaler = preprocessing.MinMaxScaler()
data = data_scaler.fit_transform(data.values)
target = target_scaler.fit_transform(target.values)
test = data_scaler.fit_transform(test.values)
# Setting seed for reproducibility
environment.reproducible()
# # split data into training and validation
x_train, x_test, y_train, y_test = train_test_split(
data, target, train_size=0.85
)
# Creating the neural network
# connection:
# Values being trained on. Currently lat, lon, year, bdrms, bathrooms, square feet,
# residential, condo, year built (10)
# Size of hidden layer. Currently arbitrarily set to 50
# Size of output values. Currently bldg_price and land_price (2)
cgnet = algorithms.ConjugateGradient(
connection=[
layers.Input(10),
layers.Sigmoid(50),
layers.Sigmoid(2),
],
search_method='golden',
show_epoch=25,
verbose=True,
addons=[algorithms.LinearSearch],
)
# Train neural net
cgnet.train(x_train, y_train, x_test, y_test, epochs=100)
# Make predictions
y_predict = cgnet.predict(test)
# write values to csv
# lat,lon,year,bdrms,fbath,hbath,sf,res,condo,built
with open('predict.csv','w') as myfile:
wr = csv.writer(myfile,quoting=csv.QUOTE_ALL)
wr.writerow(["latitude", "longitude", "year", "bedrooms", "full_bth", "half_bth", "square_foot", "res", "condo", "yr_built", "bldg_price", "land_price"])
for i in range(len(y_predict)):
wr.writerow(data_scaler.inverse_transform(test)[i].tolist() + target_scaler.inverse_transform(y_predict)[i].tolist())