In [1]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [2]:
os.chdir("c:\\PythonProject\\EDA-Project")
os.getcwd()

'c:\\PythonProject\\EDA-Project'

In [3]:
df_dnn_10cv_nodes = pd.read_csv("./data/dnn_10_cv_nod.csv")

In [4]:
df_dnn_10cv_nodes

Unnamed: 0,NN_Nodes,RMSE,MAE,MAPE
0,node_4,8203.11709,6471.704004,59.112028
1,node_5,7072.791797,5274.609082,51.164729
2,node_6,7335.614111,5597.333398,54.500249
3,node_7,6146.938232,4489.313354,45.346706
4,node_8,5940.875342,4303.817236,43.871529
5,node_10,5573.834863,3987.099194,41.473326
6,node_12,5389.542188,3844.239478,40.318003
7,node_14,5254.546826,3741.569995,39.603269
8,node_16,5077.989063,3585.9177,38.497878
9,node_18,5026.362451,3543.804688,38.180596


In [5]:
df_dnn_10cv_nodes.dtypes

NN_Nodes     object
RMSE        float64
MAE         float64
MAPE        float64
dtype: object

In [6]:
eda_tempelete_01_white = dict(
    layout = go.Layout(
        # Layout properties
        title_font_size= 14,
        title_x= 0.1,
        font_size= 11,
        font_color= "#000000",
        font_family= "Times New Roman",
        margin_b = 65,
        margin_l = 60,
        margin_r = 30,
        margin_t = 50,
        plot_bgcolor= "#ffffff",
        # X axis properties
        xaxis_color= "#000000",
        xaxis_linecolor= "#000000",
        xaxis_ticks= "inside",        
        xaxis_tickfont_color= "#000000",
        xaxis_tickfont_family= "Times New Roman",
        xaxis_mirror= True,
        xaxis_showline= True,
        xaxis_showgrid= False,
        # Y axis properties
        yaxis_color= "#000000",
        yaxis_linecolor= "#000000",
        yaxis_ticks= "inside",
        yaxis_tickfont_color= "#000000",
        yaxis_tickfont_family= "Times New Roman",
        yaxis_mirror= True,
        yaxis_showline= True,
        yaxis_showgrid= False,
    )
)

In [7]:
fig_dnn_10cv_nodes =  go.Figure()

fig_dnn_10cv_nodes.add_traces(
    go.Scatter(
        x= df_dnn_10cv_nodes["NN_Nodes"],
        y= df_dnn_10cv_nodes["MAPE"],
        mode= "lines+markers",
        line_color = "#000000",        
    )
)

fig_dnn_10cv_nodes.update_layout(
    title= "MAPE, 10-fold CV mean values for different node numbers",
    xaxis_title= "Number of nodes in the hidden layers",
    xaxis_tickangle = 45,
    yaxis_title= "Mean Absolute Percentage Error [%]",
    width= 500,
    height= 350,
    template= eda_tempelete_01_white,    
)

fig_dnn_10cv_nodes.add_vline(x="node_30", line_color= "#1377b9", line_dash= "dot")
fig_dnn_10cv_nodes.add_vrect(
    x0="node_30", x1= "node_100",
    line_width= 0, fillcolor= "#1377b9", opacity= 0.1,
)

fig_dnn_10cv_nodes.show()

In [8]:
import tensorflow as tf 
from tensorflow import keras

In [9]:
# Paths for test data set.
str_path_test_data = "./data/sKor_test_data.csv"

# Read test data set
df_raw_test = pd.read_csv(str_path_test_data)

# Copy train data set
df_test = df_raw_test.copy()

# Clean up unused colums
lst_cols = list(df_test.columns)
# lst_cols.pop(lst_cols.index("id_hh"))
# lst_cols.pop(lst_cols.index("id_hs"))

# Numerical feature columns 
lst_cols_num = [l for l in lst_cols if l.split("_")[0] == "num"]
lst_cols_num.pop(lst_cols_num.index("num_tot_energy_heat"))

# Categorical feature columns
lst_cols_cat = [l for l in lst_cols if l.split("_")[0] == "cat"]

# If needed, filter data frame here and check data types
lst_cols_fil = lst_cols_num + lst_cols_cat + ["num_tot_energy_heat"]
df_test_fil = df_test[lst_cols_fil]

# Convert categorical variables into category type
for cat in lst_cols_cat:    
    df_test_fil[cat] = df_test_fil[cat].astype("category")
    
# Create dummy (on-hot coding) columns for each categorical labels
for cat in lst_cols_cat:    
    df_test_fil = pd.get_dummies(df_test_fil, columns= [cat], prefix= cat, prefix_sep="_")

# Make arrays
test_featrues = df_test_fil.copy()
test_label = test_featrues.pop("num_tot_energy_heat")
test_featrues_array = np.array(test_featrues)
test_label_array = np.array(test_label)

In [10]:
dnn_model_fin = tf.keras.models.load_model("./Python/dnn_model_fin")
dnn_model_fin.evaluate(test_featrues_array, test_label_array, verbose=0)

[33.20449447631836, 5499.42822265625, 3716.8154296875, 33.20449447631836]

In [11]:
test_predictions = dnn_model_fin.predict(test_featrues_array).flatten()
residual = test_predictions - test_label_array
residual

array([-1902.90894531, -6137.51907562,  4026.98294137, ...,
       -1932.05748047,  -268.44384766,  -346.80934812])

In [14]:
fig_residual = px.scatter(
    x= test_predictions,
    y= residual,
    trendline= "ols",
    trendline_color_override= "#fc4040",    
)

fig_residual.update_traces(
    marker_symbol= "circle-open",
    marker_color= "#000000"
)

fig_residual.update_layout(
    title= "Residuals for DNN predictions",
    xaxis_title= "Predicted Energy Consumption [MCal]",
    yaxis_title= "Residual [MCal]",
    # yaxis_zeroline= False,
    width= 350,
    height= 350,
    template= eda_tempelete_01_white,    
)


fig_residual.show()

In [13]:
fig_prediction =  go.Figure()

# lim_pred = max(test_label_array)
lim_pred = 30000

fig_prediction.add_traces(
    go.Scatter(
        x= test_label_array,
        y= test_predictions,
        mode= "markers",
        marker_symbol= "circle-open",
        marker_color= "#000000",        
    )
)

fig_prediction.add_traces(
    go.Scatter(
        x= [0,lim_pred],
        y= [0,lim_pred],
        mode= "lines",        
        marker_color= "#fc4040",        
    )
)

fig_prediction.update_layout(
    title= "Prediction of Energy Consumption",
    xaxis_title= "True Values [MCal]",    
    xaxis_fixedrange= True,
    xaxis_range = [0, lim_pred],
    yaxis_title= "Predictions [MCal]",
    yaxis_fixedrange= True,
    yaxis_range = [0, lim_pred],
    width= 350,
    height= 350,
    showlegend= False,
    template= eda_tempelete_01_white,    
)