In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
data = pd.read_csv('Melbourne_housing_FULL.csv')

In [3]:
data = data[data.Price.notna()]

In [4]:
data.dropna(subset=['Rooms', 'Distance', 'Bathroom', 'Landsize', 'BuildingArea', 'YearBuilt'], inplace=True)

In [5]:
X = data[['Rooms', 'Distance', 'Bathroom', 'Landsize', 'BuildingArea', 'YearBuilt']]
y = data['Price']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
with open('scaler.pkl','wb') as files:
    pickle.dump(scaler, files)

In [9]:
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

In [10]:
with open('knn_model.pkl','wb') as files:
    pickle.dump(knn_model, files)

In [11]:
nn_train = knn_model.predict(X_train_scaled).reshape(-1, 1)
nn_test = knn_model.predict(X_test_scaled).reshape(-1, 1)

In [12]:
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=1, activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(16, activation='relu'))
nn_model.add(Dense(8, activation='relu'))
nn_model.add(Dense(1, activation='linear'))

In [13]:
nn_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

In [14]:
nn_model.fit(nn_train, y_train, validation_data=(nn_test,y_test), epochs=200, batch_size=32, verbose=2)

Epoch 1/200
226/226 - 3s - loss: 414237425664.0000 - mse: 414237425664.0000 - val_loss: 192987299840.0000 - val_mse: 192987299840.0000 - 3s/epoch - 12ms/step
Epoch 2/200
226/226 - 0s - loss: 110548860928.0000 - mse: 110548860928.0000 - val_loss: 191845793792.0000 - val_mse: 191845793792.0000 - 287ms/epoch - 1ms/step
Epoch 3/200
226/226 - 0s - loss: 111229206528.0000 - mse: 111229206528.0000 - val_loss: 191792332800.0000 - val_mse: 191792332800.0000 - 297ms/epoch - 1ms/step
Epoch 4/200
226/226 - 0s - loss: 110810128384.0000 - mse: 110810128384.0000 - val_loss: 192292290560.0000 - val_mse: 192292290560.0000 - 339ms/epoch - 1ms/step
Epoch 5/200
226/226 - 0s - loss: 111929401344.0000 - mse: 111929401344.0000 - val_loss: 195144679424.0000 - val_mse: 195144679424.0000 - 322ms/epoch - 1ms/step
Epoch 6/200
226/226 - 0s - loss: 111390670848.0000 - mse: 111390670848.0000 - val_loss: 199380959232.0000 - val_mse: 199380959232.0000 - 326ms/epoch - 1ms/step
Epoch 7/200
226/226 - 0s - loss: 112166780

<keras.callbacks.History at 0x27636582cd0>

In [15]:
new_data = pd.DataFrame({"Rooms": [2, 3, 4], "Distance": [5, 10, 15], "Bathroom": [1, 2, 3], "Landsize": [300, 400, 500], "BuildingArea": [100, 150, 200], "YearBuilt": [2000, 2010, 2020]})
scaled = scaler.transform(new_data)
knn_prediction = knn_model.predict(scaled).reshape(-1, 1)
nn_prediction = nn_model.predict(knn_prediction)
print(nn_prediction)

[[ 886957.9 ]
 [ 984320.94]
 [1060340.  ]]


In [16]:
with open('nn_model.pkl','wb') as files:
    pickle.dump(nn_model, files)