In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
data = pd.read_csv('house_prices.csv')
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [3]:
data.columns

Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15'],
      dtype='object')

In [4]:
used_columns = ['price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long']
used_columns

['price',
 'bedrooms',
 'bathrooms',
 'sqft_living',
 'sqft_lot',
 'floors',
 'waterfront',
 'view',
 'condition',
 'grade',
 'sqft_above',
 'sqft_basement',
 'yr_built',
 'yr_renovated',
 'zipcode',
 'lat',
 'long']

In [5]:
data = pd.read_csv('house_prices.csv', usecols = used_columns)
data.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long
0,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257
1,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319
2,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233
3,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393
4,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045


In [6]:
scaler_x = MinMaxScaler()
data[['bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long']] = scaler_x.fit_transform(data[['bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long']])

In [7]:
data.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long
0,221900.0,0.090909,0.125,0.06717,0.003108,0.0,0.0,0.0,0.5,0.5,0.097588,0.0,0.478261,0.0,0.893939,0.571498,0.217608
1,538000.0,0.090909,0.28125,0.172075,0.004072,0.4,0.0,0.0,0.5,0.5,0.20614,0.082988,0.443478,0.988089,0.626263,0.908959,0.166113
2,180000.0,0.060606,0.125,0.036226,0.005743,0.0,0.0,0.0,0.5,0.416667,0.052632,0.0,0.286957,0.0,0.136364,0.936143,0.237542
3,604000.0,0.121212,0.375,0.126038,0.002714,0.0,0.0,0.0,1.0,0.5,0.083333,0.188797,0.565217,0.0,0.681818,0.586939,0.104651
4,510000.0,0.090909,0.25,0.104906,0.004579,0.0,0.0,0.0,0.5,0.583333,0.152412,0.0,0.756522,0.0,0.368687,0.741354,0.393688


In [8]:
scaler_y = MinMaxScaler()
data[['price']] = scaler_y.fit_transform(data[['price']])
data.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long
0,0.019266,0.090909,0.125,0.06717,0.003108,0.0,0.0,0.0,0.5,0.5,0.097588,0.0,0.478261,0.0,0.893939,0.571498,0.217608
1,0.060721,0.090909,0.28125,0.172075,0.004072,0.4,0.0,0.0,0.5,0.5,0.20614,0.082988,0.443478,0.988089,0.626263,0.908959,0.166113
2,0.01377,0.060606,0.125,0.036226,0.005743,0.0,0.0,0.0,0.5,0.416667,0.052632,0.0,0.286957,0.0,0.136364,0.936143,0.237542
3,0.069377,0.121212,0.375,0.126038,0.002714,0.0,0.0,0.0,1.0,0.5,0.083333,0.188797,0.565217,0.0,0.681818,0.586939,0.104651
4,0.057049,0.090909,0.25,0.104906,0.004579,0.0,0.0,0.0,0.5,0.583333,0.152412,0.0,0.756522,0.0,0.368687,0.741354,0.393688


In [9]:
X = data.drop('price', axis = 1)
y = data.price

In [10]:
X.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long
0,0.090909,0.125,0.06717,0.003108,0.0,0.0,0.0,0.5,0.5,0.097588,0.0,0.478261,0.0,0.893939,0.571498,0.217608
1,0.090909,0.28125,0.172075,0.004072,0.4,0.0,0.0,0.5,0.5,0.20614,0.082988,0.443478,0.988089,0.626263,0.908959,0.166113
2,0.060606,0.125,0.036226,0.005743,0.0,0.0,0.0,0.5,0.416667,0.052632,0.0,0.286957,0.0,0.136364,0.936143,0.237542
3,0.121212,0.375,0.126038,0.002714,0.0,0.0,0.0,1.0,0.5,0.083333,0.188797,0.565217,0.0,0.681818,0.586939,0.104651
4,0.090909,0.25,0.104906,0.004579,0.0,0.0,0.0,0.5,0.583333,0.152412,0.0,0.756522,0.0,0.368687,0.741354,0.393688


In [11]:
y.head()

0    0.019266
1    0.060721
2    0.013770
3    0.069377
4    0.057049
Name: price, dtype: float64

In [12]:
predictors_columns = used_columns[1:17]
predictors_columns

['bedrooms',
 'bathrooms',
 'sqft_living',
 'sqft_lot',
 'floors',
 'waterfront',
 'view',
 'condition',
 'grade',
 'sqft_above',
 'sqft_basement',
 'yr_built',
 'yr_renovated',
 'zipcode',
 'lat',
 'long']

In [13]:
columns = [tf.feature_column.numeric_column(key = c) for c in predictors_columns]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [15]:
X_train.shape

(15129, 16)

In [16]:
X_test.shape

(6484, 16)

In [17]:
train_function = tf.estimator.inputs.pandas_input_fn(x = X_train, y = y_train, batch_size = 8,
                                                        num_epochs = None, shuffle = True)
regressor = tf.estimator.DNNRegressor(hidden_units = [8, 8, 8], feature_columns=columns)
regressor.train(input_fn = train_function, steps = 20000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\JOELPI~1\\AppData\\Local\\Temp\\tmpgn91typt', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001BE59A67F60>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\JOELPI

INFO:tensorflow:loss = 0.0014703085, step = 7000 (0.900 sec)
INFO:tensorflow:global_step/sec: 109.47
INFO:tensorflow:loss = 0.0053517306, step = 7100 (0.914 sec)
INFO:tensorflow:global_step/sec: 115.004
INFO:tensorflow:loss = 0.0015257546, step = 7200 (0.869 sec)
INFO:tensorflow:global_step/sec: 121.619
INFO:tensorflow:loss = 0.0017951493, step = 7300 (0.819 sec)
INFO:tensorflow:global_step/sec: 102.614
INFO:tensorflow:loss = 0.0022991435, step = 7400 (0.980 sec)
INFO:tensorflow:global_step/sec: 105.477
INFO:tensorflow:loss = 0.0016978022, step = 7500 (0.946 sec)
INFO:tensorflow:global_step/sec: 130.623
INFO:tensorflow:loss = 0.0023437086, step = 7600 (0.763 sec)
INFO:tensorflow:global_step/sec: 136.877
INFO:tensorflow:loss = 0.0019846535, step = 7700 (0.730 sec)
INFO:tensorflow:global_step/sec: 127.787
INFO:tensorflow:loss = 0.0016265246, step = 7800 (0.783 sec)
INFO:tensorflow:global_step/sec: 138.968
INFO:tensorflow:loss = 0.009344678, step = 7900 (0.722 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 170.166
INFO:tensorflow:loss = 0.0020910087, step = 15100 (0.586 sec)
INFO:tensorflow:global_step/sec: 174.012
INFO:tensorflow:loss = 0.018278698, step = 15200 (0.575 sec)
INFO:tensorflow:global_step/sec: 117.992
INFO:tensorflow:loss = 0.0041158022, step = 15300 (0.851 sec)
INFO:tensorflow:global_step/sec: 152.759
INFO:tensorflow:loss = 0.001387665, step = 15400 (0.652 sec)
INFO:tensorflow:global_step/sec: 147.795
INFO:tensorflow:loss = 0.0048043397, step = 15500 (0.679 sec)
INFO:tensorflow:global_step/sec: 173.109
INFO:tensorflow:loss = 0.001742339, step = 15600 (0.577 sec)
INFO:tensorflow:global_step/sec: 146.902
INFO:tensorflow:loss = 0.0015582258, step = 15700 (0.681 sec)
INFO:tensorflow:global_step/sec: 167.511
INFO:tensorflow:loss = 0.0023359857, step = 15800 (0.597 sec)
INFO:tensorflow:global_step/sec: 155.852
INFO:tensorflow:loss = 0.0006469828, step = 15900 (0.641 sec)
INFO:tensorflow:global_step/sec: 162.96
INFO:tensorflow:loss = 0.0011833716,

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x1be59a67ba8>

In [18]:
prediction_function = tf.estimator.inputs.pandas_input_fn(x = X_test, shuffle = False)
predictors = regressor.predict(input_fn=prediction_function)
list(predictors)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\JOELPI~1\AppData\Local\Temp\tmpgn91typt\model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[{'predictions': array([0.06237131], dtype=float32)},
 {'predictions': array([0.0335523], dtype=float32)},
 {'predictions': array([0.06928113], dtype=float32)},
 {'predictions': array([0.0202541], dtype=float32)},
 {'predictions': array([0.06640124], dtype=float32)},
 {'predictions': array([0.02376987], dtype=float32)},
 {'predictions': array([0.10148427], dtype=float32)},
 {'predictions': array([0.1299441], dtype=float32)},
 {'predictions': array([0.02245043], dtype=float32)},
 {'predictions': array([0.10588762], dtype=float32)},
 {'predictions': array([0.02826022], dtype=float32)},
 {'predictions': array([0.05015577], dtype=float32)},
 {'predictions': array([0.00576385], dtype=float32)},
 {'predictions': array([0.04353298], dtype=float32)},
 {'predictions': array([0.01654647], dtype=float32)},
 {'predictions': array([0.05332549], dtype=float32)},
 {'predictions': array([0.09169354], dtype=float32)},
 {'predictions': array([0.01299135], dtype=float32)},
 {'predictions': array([0.09857

In [19]:
predict_values = []
for p in regressor.predict(input_fn=prediction_function):
    predict_values.append(p['predictions'][0])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\JOELPI~1\AppData\Local\Temp\tmpgn91typt\model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [20]:
predict_values

[0.06237131,
 0.0335523,
 0.06928113,
 0.020254102,
 0.06640124,
 0.023769869,
 0.10148427,
 0.1299441,
 0.022450432,
 0.10588762,
 0.028260224,
 0.050155766,
 0.005763851,
 0.04353298,
 0.016546475,
 0.053325485,
 0.091693535,
 0.012991346,
 0.098570734,
 0.03552663,
 0.2739621,
 0.061418664,
 0.61284786,
 0.021288626,
 0.06384404,
 0.06481631,
 0.08589287,
 0.042005748,
 0.043433692,
 0.13352136,
 0.09371415,
 0.036549136,
 0.022269366,
 0.07957618,
 0.061892625,
 0.0386462,
 0.0373522,
 0.019704763,
 0.0141635835,
 0.013536986,
 0.044893373,
 0.06997791,
 0.03711227,
 0.14497882,
 0.057251114,
 0.028629128,
 0.045710966,
 0.029164575,
 0.026650261,
 0.04730596,
 0.022609392,
 0.142933,
 0.022910293,
 0.06441954,
 0.085073784,
 0.08900915,
 0.23116499,
 0.045413766,
 0.0153659955,
 0.04546479,
 0.17309552,
 0.06468463,
 0.03158253,
 0.052932203,
 0.092807636,
 0.06869143,
 0.07885492,
 0.018515665,
 0.12504552,
 0.021070424,
 0.09796788,
 0.05512424,
 0.034056004,
 0.059147317,
 0.05

In [21]:
predict_values = np.asarray(predict_values).reshape(-1,1)
predict_values = scaler_y.inverse_transform(predict_values)
predict_values

array([[550581.25],
       [330836.28],
       [603268.6 ],
       ...,
       [725395.06],
       [452950.7 ],
       [534870.56]], dtype=float32)

In [22]:
y_test2 = y_test.values.reshape(-1,1)
y_test2 = scaler_y.inverse_transform(y_test2)
y_test2

array([[540000.],
       [245990.],
       [585188.],
       ...,
       [590000.],
       [370000.],
       [520000.]])

In [23]:
mae = mean_absolute_error(y_test2, predict_values)
mae

101395.81338679827