In [1]:
import sys
import os

# Add ROOT/model path to load own modules
model_path = os.path.normpath(os.path.join(os.getcwd(), *[os.pardir]*2, "model"))
sys.path.insert(0, model_path)

In [2]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.metrics import mean_squared_error, r2_score
from utils import ROOT_DIR, get_data
from joblib import load


%matplotlib inline

In [3]:
X_train, Y_train, X_validation, Y_validation, X_test, Y_test = get_data()

In [4]:
data = {
    "train": (X_train, Y_train),
    "validation": (X_validation, Y_validation),
    "test": (X_test, Y_test)
}

In [5]:
sets = {
    "train": None,
    "validation": None,
    "test": None
}

In [6]:
description = {
    "model": None,
    "predictions": {**sets},
    "rmse": {**sets},
    "R2": {**sets}
}

In [7]:
models = {
    "MLP_0": {**description},
    "MLP_1": {**description},
    "MLP_2": {**description},
    "XGBOOST": {**description}
}

In [8]:
models["MLP_0"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/no-hidden-layer")
models["MLP_1"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/one-hidden-layer-2")
models["MLP_2"]["model"] = tf.keras.models.load_model(ROOT_DIR + "/best-models/two-hidden-layers-2")
models["XGBOOST"]["model"] = load(ROOT_DIR + "/best-models/xgboost/model.joblib.dat")

In [9]:
rmseDf = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [10]:
R2Df = pd.DataFrame(index=[model_key for model_key, _ in models.items()])

In [11]:
for model_key, model_description in models.items():
    print("--MODEL: ", model_key)
    for set_key, set_value in model_description["predictions"].items():
        model_description["predictions"][set_key] = model_description["model"].predict(data[set_key][0]).reshape(-1)
        model_description["rmse"][set_key] = np.sqrt(mean_squared_error(data[set_key][1], model_description["predictions"][set_key]))
        model_description["R2"][set_key] = r2_score(data[set_key][1], model_description["predictions"][set_key])
        
        rmseDf.loc[model_key, set_key] = model_description["rmse"][set_key]
        R2Df.loc[model_key, set_key] = model_description["R2"][set_key]
        print("    -SET: ", set_key, "\n")
        print("        RMSE: ", model_description["rmse"][set_key])
        print("        R2: ", model_description["R2"][set_key])
        print("\n")

--MODEL:  MLP_0
    -SET:  train 

        RMSE:  0.697255647915664
        R2:  0.1532847844994446


    -SET:  validation 

        RMSE:  0.7814862814432579
        R2:  0.1012656040565919


    -SET:  test 

        RMSE:  0.7397863554712505
        R2:  0.1899891160032653


--MODEL:  MLP_1
    -SET:  train 

        RMSE:  0.5809772611122567
        R2:  0.4121431919498729


    -SET:  validation 

        RMSE:  0.7128265424665317
        R2:  0.2522500874761564


    -SET:  test 

        RMSE:  0.6990948936112176
        R2:  0.2766466778850597


--MODEL:  MLP_2
    -SET:  train 

        RMSE:  0.6205156826198612
        R2:  0.32940732271325235


    -SET:  validation 

        RMSE:  0.7126797363581999
        R2:  0.2525580528622915


    -SET:  test 

        RMSE:  0.7044357734961113
        R2:  0.26555204496031615


--MODEL:  XGBOOST
    -SET:  train 

        RMSE:  0.40094372119758903
        R2:  0.7200244438261473


    -SET:  validation 

        RMSE:  0.755622215

In [12]:
print(rmseDf.round(4).to_latex(caption="RMSE metric of models for train/validation/test sets", label="tab:rmse-metrics"))

\begin{table}
\centering
\caption{RMSE metric of models for train/validation/test sets}
\label{tab:rmse-metrics}
\begin{tabular}{lrrr}
\toprule
{} &   train &  validation &    test \\
\midrule
MLP\_0   &  0.6973 &      0.7815 &  0.7398 \\
MLP\_1   &  0.5810 &      0.7128 &  0.6991 \\
MLP\_2   &  0.6205 &      0.7127 &  0.7044 \\
XGBOOST &  0.4009 &      0.7556 &  0.7133 \\
\bottomrule
\end{tabular}
\end{table}



In [13]:
print(R2Df.round(4).to_latex(caption="R2 metric of models for train/validation/test sets", label="tab:r2-metrics"))

\begin{table}
\centering
\caption{R2 metric of models for train/validation/test sets}
\label{tab:r2-metrics}
\begin{tabular}{lrrr}
\toprule
{} &   train &  validation &    test \\
\midrule
MLP\_0   &  0.1533 &      0.1013 &  0.1900 \\
MLP\_1   &  0.4121 &      0.2523 &  0.2766 \\
MLP\_2   &  0.3294 &      0.2526 &  0.2656 \\
XGBOOST &  0.7200 &      0.1598 &  0.2470 \\
\bottomrule
\end{tabular}
\end{table}

