In [1]:
import sys
import os

# Add ROOT/model path to load own modules
model_path = os.path.normpath(os.path.join(os.getcwd(), *[os.pardir]*2))
sys.path.insert(0, model_path)

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from utils import ROOT_DIR, get_data
from joblib import load

%load_ext autoreload
%autoreload 2

%matplotlib inline

In [3]:
X_train, Y_train, X_validation, Y_validation, X_test, Y_test = get_data()

In [4]:
data = {
    "training": (X_train, Y_train),
    "validation": (X_validation, Y_validation),
    "test": (X_test, Y_test)
}

In [5]:
sets = {
    "training": None,
    "validation": None,
    "test": None
}

In [6]:
description = {
    "model": None,
    "predictions": {**sets},
    "rmse": {**sets},
    "mae": {**sets},
    "R2": {**sets}
}

In [7]:
models = {
    "MLP0": {**description},
    "MLP1": {**description},
    "MLP2": {**description},
    "XGBOOST": {**description}
}

In [8]:
models["MLP0"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/mlp/hidden-layers-0")
models["MLP1"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/mlp/hidden-layers-1")
models["MLP2"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/mlp/hidden-layers-2")
models["XGBOOST"]["model"] = load(ROOT_DIR + "/best-models/xgboost/model.joblib.dat")

In [9]:
rmseDf = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [10]:
maeDf = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [11]:
R2Df = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [12]:
for model_key, model_description in models.items():
    print("--MODEL: ", model_key)
    for set_key, set_value in model_description["predictions"].items():
        model_description["predictions"][set_key] = model_description["model"].predict(data[set_key][0]).reshape(-1)
        model_description["rmse"][set_key] = np.sqrt(mean_squared_error(data[set_key][1], model_description["predictions"][set_key]))
        model_description["mae"][set_key] = mean_absolute_error(data[set_key][1], model_description["predictions"][set_key])
        model_description["R2"][set_key] = r2_score(data[set_key][1], model_description["predictions"][set_key])
        
        rmseDf.loc[model_key, set_key] = model_description["rmse"][set_key]
        maeDf.loc[model_key, set_key] = model_description["mae"][set_key]
        R2Df.loc[model_key, set_key] = model_description["R2"][set_key]
        print("    -SET: ", set_key, "\n")
        print("        RMSE: ", model_description["rmse"][set_key])
        print("        MAE: ", model_description["mae"][set_key])
        print("        R2: ", model_description["R2"][set_key])
        print("\n")

--MODEL:  MLP0
    -SET:  training 

        RMSE:  0.6951549354584294
        MAE:  0.446393040896501
        R2:  0.22740852976046155


    -SET:  validation 

        RMSE:  0.6837542344516792
        MAE:  0.44973005604844785
        R2:  0.11134097421652955


    -SET:  test 

        RMSE:  0.6620162202354599
        MAE:  0.44600620204901004
        R2:  0.24175540961843844


--MODEL:  MLP1
    -SET:  training 

        RMSE:  0.632382519805944
        MAE:  0.38633595702381845
        R2:  0.36063861720954205


    -SET:  validation 

        RMSE:  0.6157468271293002
        MAE:  0.3956968376286993
        R2:  0.2793249885237765


    -SET:  test 

        RMSE:  0.6265302642209761
        MAE:  0.40169343370189553
        R2:  0.3208649133624505


--MODEL:  MLP2
    -SET:  training 

        RMSE:  0.6752053990989827
        MAE:  0.4131325904719386
        R2:  0.2711158594753902


    -SET:  validation 

        RMSE:  0.6327858524723503
        MAE:  0.41615921474701395


In [13]:
print(rmseDf.to_latex(caption="RMSE metric of models for training/validation/test sets",
                      label="tab:rmse-metrics",
                      formatters=["{:0.2f}".format, "{:0.2f}".format, "{:0.2f}".format]))

\begin{table}
\centering
\caption{RMSE metric of models for training/validation/test sets}
\label{tab:rmse-metrics}
\begin{tabular}{lrrr}
\toprule
{} & training & validation & test \\
\midrule
MLP0    &     0.70 &       0.68 & 0.66 \\
MLP1    &     0.63 &       0.62 & 0.63 \\
MLP2    &     0.68 &       0.63 & 0.66 \\
XGBOOST &     0.46 &       0.58 & 0.64 \\
\bottomrule
\end{tabular}
\end{table}



In [14]:
print(maeDf.to_latex(caption="MAE metric of models for training/validation/test sets",
                     label="tab:mae-metrics",
                     formatters=["{:0.2f}".format, "{:0.2f}".format, "{:0.2f}".format]))

\begin{table}
\centering
\caption{MAE metric of models for training/validation/test sets}
\label{tab:mae-metrics}
\begin{tabular}{lrrr}
\toprule
{} & training & validation & test \\
\midrule
MLP0    &     0.45 &       0.45 & 0.45 \\
MLP1    &     0.39 &       0.40 & 0.40 \\
MLP2    &     0.41 &       0.42 & 0.43 \\
XGBOOST &     0.28 &       0.37 & 0.39 \\
\bottomrule
\end{tabular}
\end{table}



In [15]:
print(R2Df.to_latex(caption="R2 metric of models for training/validation/test sets",
                             label="tab:r2-metrics",
                             formatters=["{:0.3f}".format, "{:0.3f}".format, "{:0.3f}".format]))

\begin{table}
\centering
\caption{R2 metric of models for training/validation/test sets}
\label{tab:r2-metrics}
\begin{tabular}{lrrr}
\toprule
{} & training & validation &  test \\
\midrule
MLP0    &    0.227 &      0.111 & 0.242 \\
MLP1    &    0.361 &      0.279 & 0.321 \\
MLP2    &    0.271 &      0.239 & 0.255 \\
XGBOOST &    0.657 &      0.354 & 0.288 \\
\bottomrule
\end{tabular}
\end{table}

