#  Real estate price prediction

## Učitavanje biblioteka

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn import metrics
import tensorflow as tf
import math
import time

In [None]:
def acc_loss_plot(histories, model, ind=0):
        plt.plot(pd.DataFrame(histories[ind].history["mean_squared_error"]))
        plt.plot(pd.DataFrame(histories[ind].history["val_mean_squared_error"]))
        plt.xlabel('Epochs')
        plt.ylabel('MSE')
        plt.title(f'{type(model.optimizer).__name__} MSE')
        plt.figure(figsize=(6,6), dpi=500)
        plt.show()

        plt.plot(pd.DataFrame(histories[ind].history["loss"]))
        plt.plot(pd.DataFrame(histories[ind].history["val_loss"]))
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title(f'{type(model.optimizer).__name__} loss')
        plt.figure(figsize=(6,6), dpi=500)
        plt.show()
#funkcija za računanje metrika
def regression_metrics(testData,y_test, sY, model,tr_time):
    y_pred = model.predict(testData)
    print("----------------------------------------------")
    print(F"Algoritam: {type(model.optimizer).__name__}")
    print(f"R2 score:{metrics.r2_score(sY.inverse_transform(y_test),sY.inverse_transform(y_pred))}")
    print(f"Mean Absolute Error:{metrics.mean_absolute_error(sY.inverse_transform(y_test),sY.inverse_transform(y_pred))}")
    print(f"Mean Squared Error:{metrics.mean_squared_error(sY.inverse_transform(y_test),sY.inverse_transform(y_pred))}")
    print(f"Root Mean Squared Error:{math.sqrt(metrics.mean_squared_error(sY.inverse_transform(y_test),sY.inverse_transform(y_pred)))}")
    print(f"Training time (sec): {tr_time}" )
    print("***********************************************")
def metrics_table(modeli, testData, y_test,sY, tr_time, histories):
    data = []
    for ind, model in enumerate(modeli):
        start = time.time()
        y_pred = model.predict(testData)
        test_time = time.time()-start
        data.append([
            type(model.optimizer).__name__,
            metrics.r2_score(sY.inverse_transform(y_test),sY.inverse_transform(y_pred)),
            metrics.mean_absolute_error(sY.inverse_transform(y_test),sY.inverse_transform(y_pred)),
            metrics.mean_squared_error(sY.inverse_transform(y_test),sY.inverse_transform(y_pred)),
            math.sqrt(metrics.mean_squared_error(sY.inverse_transform(y_test),sY.inverse_transform(y_pred))),
            tr_time[ind],
            test_time,
            len(histories[ind].history['loss'])
            ])
        acc_loss_plot(histories, model, ind)
    df = pd.DataFrame(data, columns = ['Algoritam', "R2 score",'Mean Absolute Error',"Mean Squared Error","Root Mean Squared Error","Training time (sec)","Test time (sec)",'Epochs'])
    return df

## Učitavanje skupa podataka

In [None]:
dataset = pd.read_csv('../datasets/real_estate_dataset.csv')
dataset.head()

## Analiza skupa podataka

In [None]:
dataset.dtypes

In [None]:
dataset.describe()

In [None]:
pd.DataFrame(data= dataset['MEDV'].describe().to_numpy().reshape(8,1).T, columns=['count','mean','std','min','25%','50%','75%','max'])

In [None]:
korelacijska_matrica = dataset.corr()
plt.subplots(figsize=(10,10))
sns.heatmap(korelacijska_matrica, vmax=0.9, square=True)

In [None]:
print(korelacijska_matrica['MEDV'].drop(['MEDV']).sort_values())

## Čišćenje

In [None]:
#5 null vrijednosti za RM
dataset.isna().sum()

In [None]:
dataset.dtypes

In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(dataset[['RM']])
dataset[['RM']] = imputer.transform(dataset[['RM']])

In [None]:
dataset.isna().sum()

In [None]:
dataset.head()

In [None]:
X=dataset.drop(['MEDV'], axis=1)
Y=dataset.iloc[:,-1].values

In [None]:
X.head()

In [None]:
Y = Y.reshape(len(Y),1)

## Podjela na trening i test skupove

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state=100)

## Standardizacija

In [None]:
from sklearn.preprocessing import StandardScaler
scX = StandardScaler()
scY = StandardScaler()
X_train = scX.fit_transform(X_train)
X_test = scX.transform(X_test)
y_train = scY.fit_transform(y_train)
y_test = scY.transform(y_test)

In [None]:
y_train = y_train.flatten()
y_test = y_test.flatten()

## ANN - ADAM

In [None]:
ann_adam = tf.keras.models.Sequential()
#2 skirvena sloja sa 64 neurona
ann_adam.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_adam.add(tf.keras.layers.Dense(units=64, activation='relu'))
# Dodavanje izlaznog sloja
ann_adam.add(tf.keras.layers.Dense(units=1, activation='linear'))
# kompajliranje mreže
ann_adam.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

print(f"R2 score:{metrics.r2_score(y_test,grid_result.predict(X_test))}")

In [None]:
start = time.time()
adam_history = ann_adam.fit(X_train, y_train, batch_size = 32, epochs = 45, validation_data=(X_test,y_test))
adam_time = time.time()-start

In [None]:
regression_metrics(X_test,y_test, scY, ann_adam,adam_time)

In [None]:
acc_loss_plot([adam_history], ann_adam)

## ANN - ADAMAX

In [None]:

ann_adamax = tf.keras.models.Sequential()
ann_adamax.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_adamax.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_adamax.add(tf.keras.layers.Dense(units=1, activation='linear'))
ann_adamax.compile(optimizer = 'adamax', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [None]:
start = time.time()
adamax_history = ann_adamax.fit(X_train, y_train, batch_size = 32, epochs = 120, validation_data=(X_test,y_test))
adamax_time = time.time()-start

In [None]:
regression_metrics(X_test,y_test,scY,ann_adamax,adamax_time)

In [None]:
acc_loss_plot([adamax_history], ann_adamax)

## ANN - ADAGRAD

In [None]:

ann_adagrad = tf.keras.models.Sequential()
ann_adagrad.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_adagrad.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_adagrad.add(tf.keras.layers.Dense(units=1, activation='linear'))
ann_adagrad.compile(optimizer = 'adagrad', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [None]:
start = time.time()
adagrad_history = ann_adagrad.fit(X_train, y_train, batch_size = 32, epochs = 3000, validation_data=(X_test,y_test))
adagrad_time = time.time()-start

In [None]:
regression_metrics(X_test,y_test,scY, ann_adagrad,adagrad_time)

In [None]:
acc_loss_plot([adagrad_history], ann_adagrad)

## ANN - NADAM

In [None]:

ann_nadam = tf.keras.models.Sequential()
ann_nadam.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_nadam.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_nadam.add(tf.keras.layers.Dense(units=1, activation='linear'))
ann_nadam.compile(optimizer = 'nadam', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [None]:
start = time.time()
nadam_history = ann_nadam.fit(X_train, y_train, batch_size = 32, epochs = 45, validation_data=(X_test,y_test))
nadam_time = time.time()-start

In [None]:
regression_metrics(X_test,y_test,scY, ann_nadam,nadam_time)

In [None]:
acc_loss_plot([nadam_history], ann_nadam)

## ANN - SGD

In [None]:
ann_sgd = tf.keras.models.Sequential()
ann_sgd.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_sgd.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_sgd.add(tf.keras.layers.Dense(units=1, activation='linear'))
ann_sgd.compile(optimizer = 'SGD', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [None]:
start = time.time()
sgd_history = ann_sgd.fit(X_train, y_train, batch_size = 32, epochs = 200, validation_data=(X_test,y_test))
sgd_time = time.time()-start

In [None]:
regression_metrics(X_test,y_test,scY,ann_sgd,sgd_time)

In [None]:
acc_loss_plot([sgd_history], ann_sgd)

## ANN - RMSprop

In [None]:
ann_rms = tf.keras.models.Sequential()
ann_rms.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_rms.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann_rms.add(tf.keras.layers.Dense(units=1, activation='linear'))
ann_rms.compile(optimizer = 'rmsprop', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [None]:
start = time.time()
rms_history = ann_rms.fit(X_train, y_train, batch_size = 32, epochs = 60, validation_data=(X_test,y_test))
rms_time = time.time()-start

In [None]:
regression_metrics(X_test,y_test,scY,ann_rms,rms_time)

In [None]:
acc_loss_plot([rms_history], ann_rms)

In [None]:
models = [ann_adam,ann_adamax,ann_adagrad,ann_nadam,ann_sgd,ann_rms]
times = [adam_time,adamax_time,adagrad_time, nadam_time, sgd_time,rms_time]
histories = [adam_history,adamax_history,adagrad_history,nadam_history,sgd_history,rms_history]
metrics_table(models,X_test,y_test, scY, times, histories)

In [None]:
for model in models:
    model.save(f'../saved_models/real_estate_price/{type(model.optimizer).__name__}')