-
Notifications
You must be signed in to change notification settings - Fork 3
/
realty0.py
151 lines (126 loc) · 4.15 KB
/
realty0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
###
# realty0.py
#
# Predicts housing prices for the year 2018.
#
# Outputs predict2018.csv
###
import csv
import numpy as np
import pandas as pd
from neupy import plots
from neupy import environment
import matplotlib.pyplot as pl
from sklearn import preprocessing
from neupy.estimators import rmsle
from neupy import algorithms, layers
import sklearn.learning_curve as curves
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
# Read training data files and testing data files
data = pd.read_csv('./new_data/data08-15.csv')
# Shuffle data
data = data.iloc[np.random.permutation(len(data))]
test = pd.read_csv('./new_data/data2018.csv')
# Get specific columns that we will be using and transfer them into the appropriate type
# For training data
lat = data["latitude"]
lon = data["longitude"]
year = data["year"]
bdrms = data["bedrooms"]
fbath = data["full_bth"]
hbath = data["half_bth"]
sf = data["square_foot"]
res = data["res"]
condo = data["condo"]
built = data["yr_built"]
bldg = data["bldg_price"]
land = data["land_price"]
lat.astype('float')
lon.astype('float')
year.astype('float')
bdrms.astype('float')
fbath.astype('float')
hbath.astype('float')
sf.astype('float')
res.astype('float')
condo.astype('float')
built.astype('float')
bldg.astype('float')
land.astype('float')
# Get specific columns that we will be using and transfer them into the appropriate type
# For testing data
lat_test = test["latitude"]
lon_test = test["longitude"]
year_test = test["year"]
bdrms_test = test["bedrooms"]
fbath_test = test["full_bth"]
hbath_test = test["half_bth"]
sf_test = test["square_foot"]
res_test = test["res"]
condo_test = test["condo"]
built_test = test["yr_built"]
bldg_test = test["bldg_price"]
land_test = test["land_price"]
lat_test.astype('float')
lon_test.astype('float')
year_test.astype('float')
bdrms_test.astype('float')
fbath_test.astype('float')
hbath_test.astype('float')
sf_test.astype('float')
res_test.astype('float')
condo_test.astype('float')
built_test.astype('float')
bldg_test.astype('float')
land_test.astype('float')
# Concatenate all values above
data = pd.concat([lat,lon,year,bdrms,fbath,hbath,sf,res,condo,built], axis = 1)
test = pd.concat([lat_test,lon_test,year_test,bdrms_test,fbath_test,hbath_test,sf_test,res_test,condo_test,built_test], axis = 1)
# Target values. Currently bldg_price, land_price
target = pd.concat([bldg,land],axis=1)
target_test = pd.concat([bldg_test,land_test],axis=1)
# Normalize data
data_scaler = preprocessing.MinMaxScaler()
target_scaler = preprocessing.MinMaxScaler()
data = data_scaler.fit_transform(data.values)
target = target_scaler.fit_transform(target.values)
test = data_scaler.fit_transform(test.values)
# Setting seed for reproducibility
environment.reproducible()
# split data into training and validation
x_train, x_test, y_train, y_test = train_test_split(
data, target, train_size=0.85
)
# Creating the neural network
# connection:
# Values being trained on. Currently lat, lon, year, bdrms, bathrooms, square feet,
# residential, condo, year built (10)
# Size of hidden layer. Currently arbitrarily set to 50
# Size of output values. Currently bldg_price and land_price (2)
cgnet = algorithms.ConjugateGradient(
connection=[
layers.Input(10),
layers.Sigmoid(50),
layers.Sigmoid(2),
],
search_method='golden',
show_epoch=25,
verbose=True,
addons=[algorithms.LinearSearch],
)
# Train neural net
cgnet.train(x_train, y_train, x_test, y_test, epochs=100)
# Make predictions
print("Starting predictions")
y_predict = cgnet.predict(test)
error = rmsle(target_test,
target_scaler.inverse_transform(y_predict))
print(error)
# write values to csv
# lat,lon,year,bdrms,fbath,hbath,sf,res,condo,built
with open('predict2018.csv','w') as myfile:
wr = csv.writer(myfile,quoting=csv.QUOTE_ALL)
wr.writerow(["latitude", "longitude", "year", "bedrooms", "full_bth", "half_bth", "square_foot", "res", "condo", "yr_built", "bldg_price", "land_price"])
for i in range(len(y_predict)):
wr.writerow(data_scaler.inverse_transform(test)[i].tolist() + target_scaler.inverse_transform(y_predict)[i].tolist())