In [14]:
import sys
import os

# Add ROOT/model path to load own modules
model_path = os.path.normpath(os.path.join(os.getcwd(), *[os.pardir]*2, "model"))
sys.path.insert(0, model_path)

In [15]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.metrics import mean_squared_error, r2_score
from utils import ROOT_DIR, get_data
from joblib import load


%matplotlib inline

In [16]:
X_train, Y_train, X_validation, Y_validation, X_test, Y_test = get_data()

In [17]:
data = {
    "train": (X_train, Y_train),
    "validation": (X_validation, Y_validation),
    "test": (X_test, Y_test)
}

In [18]:
sets = {
    "train": None,
    "validation": None,
    "test": None
}

In [19]:
description = {
    "model": None,
    "predictions": {**sets},
    "rmse": {**sets},
    "R2": {**sets}
}

In [20]:
models = {
    "MLP_0": {**description},
    "MLP_1": {**description},
    "MLP_2": {**description},
    "XGBOOST": {**description}
}

In [21]:
models["MLP_0"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/hidden-layers-0")
models["MLP_1"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/hidden-layers-1")
models["MLP_2"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/hidden-layers-2")
models["XGBOOST"]["model"] = load(ROOT_DIR + "/best-models/xgboost/model.joblib.dat")

In [22]:
rmseDf = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [23]:
R2Df = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [24]:
for model_key, model_description in models.items():
    print("--MODEL: ", model_key)
    for set_key, set_value in model_description["predictions"].items():
        model_description["predictions"][set_key] = model_description["model"].predict(data[set_key][0]).reshape(-1)
        model_description["rmse"][set_key] = np.sqrt(mean_squared_error(data[set_key][1], model_description["predictions"][set_key]))
        model_description["R2"][set_key] = r2_score(data[set_key][1], model_description["predictions"][set_key])
        
        rmseDf.loc[model_key, set_key] = model_description["rmse"][set_key]
        R2Df.loc[model_key, set_key] = model_description["R2"][set_key]
        print("    -SET: ", set_key, "\n")
        print("        RMSE: ", model_description["rmse"][set_key])
        print("        R2: ", model_description["R2"][set_key])
        print("\n")

--MODEL:  MLP_0
    -SET:  train 

        RMSE:  0.6951549354584294
        R2:  0.22740852976046155


    -SET:  validation 

        RMSE:  0.683754234451679
        R2:  0.11134097421652978


    -SET:  test 

        RMSE:  0.6620162202354599
        R2:  0.24175540961843844


--MODEL:  MLP_1
    -SET:  train 

        RMSE:  0.632382521034472
        R2:  0.3606386147253703


    -SET:  validation 

        RMSE:  0.6157468238915619
        R2:  0.2793249961027262


    -SET:  test 

        RMSE:  0.626530263543688
        R2:  0.32086491483075985


--MODEL:  MLP_2
    -SET:  train 

        RMSE:  0.675205396782917
        R2:  0.2711158644757754


    -SET:  validation 

        RMSE:  0.6327858514114709
        R2:  0.23888791347214078


    -SET:  test 

        RMSE:  0.6562601344869741
        R2:  0.2548836275446039


--MODEL:  XGBOOST
    -SET:  train 

        RMSE:  0.4630747059187001
        R2:  0.6571621150324569


    -SET:  validation 

        RMSE:  0.5830982354

In [25]:
print(rmseDf.round(4).to_latex(caption="RMSE metric of models for train/validation/test sets", label="tab:rmse-metrics"))

\begin{table}
\centering
\caption{RMSE metric of models for train/validation/test sets}
\label{tab:rmse-metrics}
\begin{tabular}{lrrr}
\toprule
{} &   train &  validation &    test \\
\midrule
MLP\_0   &  0.6952 &      0.6838 &  0.6620 \\
MLP\_1   &  0.6324 &      0.6157 &  0.6265 \\
MLP\_2   &  0.6752 &      0.6328 &  0.6563 \\
XGBOOST &  0.4631 &      0.5831 &  0.6414 \\
\bottomrule
\end{tabular}
\end{table}



In [26]:
print(R2Df.round(4).to_latex(caption="R2 metric of models for train/validation/test sets", label="tab:r2-metrics"))

\begin{table}
\centering
\caption{R2 metric of models for train/validation/test sets}
\label{tab:r2-metrics}
\begin{tabular}{lrrr}
\toprule
{} &   train &  validation &    test \\
\midrule
MLP\_0   &  0.2274 &      0.1113 &  0.2418 \\
MLP\_1   &  0.3606 &      0.2793 &  0.3209 \\
MLP\_2   &  0.2711 &      0.2389 &  0.2549 \\
XGBOOST &  0.6572 &      0.3537 &  0.2882 \\
\bottomrule
\end{tabular}
\end{table}

