In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
data = pd.read_csv('Melbourne_housing_FULL.csv')

In [3]:
data = data[data.Price.notna()]

In [4]:
data.dropna(subset=['Rooms', 'Distance', 'Bathroom', 'Landsize', 'BuildingArea', 'YearBuilt'], inplace=True)

In [5]:
X = data[['Rooms', 'Distance', 'Bathroom', 'Landsize', 'BuildingArea', 'YearBuilt']]
y = data['Price']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
with open('scaler.pkl','wb') as files:
    pickle.dump(scaler, files)

In [9]:
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

In [10]:
with open('knn_model.pkl','wb') as files:
    pickle.dump(knn_model, files)

In [11]:
nn_train = knn_model.predict(X_train_scaled).reshape(-1, 1)
nn_test = knn_model.predict(X_test_scaled).reshape(-1, 1)

In [12]:
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=1, activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(16, activation='relu'))
nn_model.add(Dense(8, activation='relu'))
nn_model.add(Dense(1, activation='linear'))

In [13]:
nn_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

In [14]:
nn_model.fit(nn_train, y_train, validation_data=(nn_test,y_test), epochs=100, batch_size=32, verbose=2)

Epoch 1/100
226/226 - 1s - loss: 437375696896.0000 - mse: 437375696896.0000 - val_loss: 191721537536.0000 - val_mse: 191721537536.0000 - 1s/epoch - 5ms/step
Epoch 2/100
226/226 - 0s - loss: 110906613760.0000 - mse: 110906613760.0000 - val_loss: 192581320704.0000 - val_mse: 192581320704.0000 - 266ms/epoch - 1ms/step
Epoch 3/100
226/226 - 0s - loss: 111506341888.0000 - mse: 111506341888.0000 - val_loss: 193039253504.0000 - val_mse: 193039253504.0000 - 262ms/epoch - 1ms/step
Epoch 4/100
226/226 - 0s - loss: 111085297664.0000 - mse: 111085297664.0000 - val_loss: 192364560384.0000 - val_mse: 192364560384.0000 - 261ms/epoch - 1ms/step
Epoch 5/100
226/226 - 0s - loss: 112288833536.0000 - mse: 112288833536.0000 - val_loss: 192245301248.0000 - val_mse: 192245301248.0000 - 263ms/epoch - 1ms/step
Epoch 6/100
226/226 - 0s - loss: 111456182272.0000 - mse: 111456182272.0000 - val_loss: 191845777408.0000 - val_mse: 191845777408.0000 - 264ms/epoch - 1ms/step
Epoch 7/100
226/226 - 0s - loss: 1114441728

<keras.callbacks.History at 0x22c244f1110>

In [17]:
new_data = pd.DataFrame({"Rooms": [2, 3, 4], "Distance": [5, 10, 15], "Bathroom": [1, 2, 3], "Landsize": [300, 400, 500], "BuildingArea": [100, 150, 200], "YearBuilt": [2000, 2010, 2020]})
scaled = scaler.transform(new_data)
knn_prediction = knn_model.predict(scaled).reshape(-1, 1)
nn_prediction = nn_model.predict(knn_prediction)
print(nn_prediction)

[[ 889954. ]
 [ 987644.1]
 [1063918.5]]


In [16]:
with open('nn_model.pkl','wb') as files:
    pickle.dump(nn_model, files)