In [1]:
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_absolute_error

In [2]:
data = pd.read_csv('melb.csv')
data.head(3)

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000,S,Biggin,3/12/2016,2.5,3067,...,1,1.0,202,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019
1,Abbotsford,25 Bloomburg St,2,h,1035000,S,Biggin,4/2/2016,2.5,3067,...,1,0.0,156,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019
2,Abbotsford,5 Charles St,3,h,1465000,SP,Biggin,4/3/2017,2.5,3067,...,2,0.0,134,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019


In [3]:
cols = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and data[col].isnull().sum() < 100]
data = data[cols]
data.head(3)

Unnamed: 0,Rooms,Price,Distance,Postcode,Bedroom2,Bathroom,Car,Landsize,Lattitude,Longtitude,Propertycount
0,2,1480000,2.5,3067,2,1,1.0,202,-37.7996,144.9984,4019
1,2,1035000,2.5,3067,2,1,0.0,156,-37.8079,144.9934,4019
2,3,1465000,2.5,3067,3,2,0.0,134,-37.8093,144.9944,4019


In [4]:
data.isnull().sum()

Rooms             0
Price             0
Distance          0
Postcode          0
Bedroom2          0
Bathroom          0
Car              62
Landsize          0
Lattitude         0
Longtitude        0
Propertycount     0
dtype: int64

In [5]:
y = data.Price
X = data.drop('Price', axis=1)

In [6]:
imputer = SimpleImputer(strategy='median')

In [7]:
imp_x = pd.DataFrame(imputer.fit_transform(X))
imp_x.columns = X.columns

imp_x.head(3)

Unnamed: 0,Rooms,Distance,Postcode,Bedroom2,Bathroom,Car,Landsize,Lattitude,Longtitude,Propertycount
0,2.0,2.5,3067.0,2.0,1.0,1.0,202.0,-37.7996,144.9984,4019.0
1,2.0,2.5,3067.0,2.0,1.0,0.0,156.0,-37.8079,144.9934,4019.0
2,3.0,2.5,3067.0,3.0,2.0,0.0,134.0,-37.8093,144.9944,4019.0


In [8]:
imp_x.shape

(13580, 10)

In [9]:
X_train, X_valid, y_train, y_valid = train_test_split(imp_x, y, test_size=0.24, random_state=11)

In [10]:
model = keras.Sequential([
    layers.Dense(units=8, input_shape=[10]),
    layers.Dense(units=8),
    layers.Dense(units=6),
    layers.Dense(units=4),
    layers.Dense(units=1)
])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [11]:
model.compile(optimizer='adam', loss='mae')

In [12]:
model.fit(X_train, y_train)



<tensorflow.python.keras.callbacks.History at 0x16e24f200c8>

In [13]:
predictions = model.predict(X_valid)
predictions

array([[ 860085.75],
       [ 653921.4 ],
       [1208847.8 ],
       ...,
       [ 636840.  ],
       [ 815095.3 ],
       [ 829751.94]], dtype=float32)

In [14]:
mae = mean_absolute_error(predictions, y_valid)
print('MAE with Deep Learning: ', mae)

MAE with Deep Learning:  564669.0808378068
