In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
results_path = "qualification_results/"

## Load Data

In [3]:
persistence_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_persistence.csv")
sarima_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_sarima.csv")
var_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_var.csv")
hofts_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_hofts.csv")
cvfts_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_cvfts.csv")
cmvfts_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_cmvfts.csv")
lstm_multi_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_lstm_multi.csv")
lstm_uni_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_lstm_uni.csv")
mlp_multi_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_mlp_multi.csv")
mlp_uni_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_mlp_uni.csv")

In [4]:
RMSE_real = []
for i in cvfts_ssa_results.RMSE:
    comp = complex(i)
    RMSE_real.append(comp.real)
cvfts_ssa_results['RMSE'] = RMSE_real

In [5]:
U_real = []
for i in cvfts_ssa_results.U:
    comp = complex(i)
    U_real.append(comp.real)
cvfts_ssa_results['U'] = U_real

In [6]:
def createBoxplot(filename, data, xticklabels, ylabel):
    # Create a figure instance
    fig = plt.figure(1, figsize=(9, 6))

    # Create an axes instance
    ax = fig.add_subplot(111)

    # Create the boxplot
    bp = ax.boxplot(data, patch_artist=True)
    
    ## change outline color, fill color and linewidth of the boxes
    for box in bp['boxes']:
        # change outline color
        box.set( color='#7570b3', linewidth=2)
        # change fill color
        box.set( facecolor = '#1b9e77' )

    ## change color and linewidth of the whiskers
    for whisker in bp['whiskers']:
        whisker.set(color='#7570b3', linewidth=2)

    ## change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='#7570b3', linewidth=2)

    ## change color and linewidth of the medians
    for median in bp['medians']:
        median.set(color='#b2df8a', linewidth=2)

    ## change the style of fliers and their fill
    for flier in bp['fliers']:
        flier.set(marker='o', color='#e7298a', alpha=0.5)
    
    ## Custom x-axis labels
    ax.set_xticklabels(xticklabels)
    ax.set_ylabel(ylabel)
    plt.show()
    fig.savefig(filename, bbox_inches='tight')

## Boxplot SSA Multivariate

In [49]:
metric = 'RMSE'
multi_data = [persistence_ssa_results[metric], var_ssa_results[metric], cmvfts_ssa_results[metric], lstm_multi_ssa_results[metric], mlp_multi_ssa_results[metric]]
xticks = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']

ylab = 'RMSE'
createBoxplot("boxplot_rmse_wind_ssa_multi", multi_data, xticks, ylab)

In [11]:
metric = 'SMAPE'
multi_data = [persistence_ssa_results[metric], var_ssa_results[metric], cmvfts_ssa_results[metric], lstm_multi_ssa_results[metric], mlp_multi_ssa_results[metric]]
xticks = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']

ylab = 'SMAPE'
createBoxplot("boxplot_smape_wind_ssa_multi", multi_data, xticks, ylab)

In [12]:
metric = 'U'
multi_data = [persistence_ssa_results[metric], var_ssa_results[metric], cmvfts_ssa_results[metric], lstm_multi_ssa_results[metric], mlp_multi_ssa_results[metric]]
xticks = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']

ylab = 'U Statistic'
createBoxplot("boxplot_u_wind_ssa_multi", multi_data, xticks, ylab)

## Improvement table Multivariate

In [57]:
def improvement(metric_model, metric_persistence):
    return (1 - (np.mean(metric_model) / np.mean(metric_persistence)))

In [58]:
index = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']
columns = ['imp(RMSE)', 'imp(SMAPE)', 'imp(U)']

In [59]:
imp_df = pd.DataFrame(columns=columns, index=index)

In [60]:
metric = 'RMSE'
imp_prst = improvement(persistence_ssa_results[metric], persistence_ssa_results[metric]) 
imp_var = improvement(var_ssa_results[metric], persistence_ssa_results[metric]) 
imp_cmvfts = improvement(cmvfts_ssa_results[metric], persistence_ssa_results[metric]) 
imp_lstm_multi = improvement(lstm_multi_ssa_results[metric], persistence_ssa_results[metric]) 
imp_mlp_multi = improvement(mlp_multi_ssa_results[metric], persistence_ssa_results[metric]) 

imp_df['imp('+metric+')'] = [imp_prst, imp_var, imp_cmvfts, imp_lstm_multi, imp_mlp_multi]

In [61]:
metric = 'SMAPE'
imp_prst = improvement(persistence_ssa_results[metric], persistence_ssa_results[metric]) 
imp_var = improvement(var_ssa_results[metric], persistence_ssa_results[metric]) 
imp_cmvfts = improvement(cmvfts_ssa_results[metric], persistence_ssa_results[metric]) 
imp_lstm_multi = improvement(lstm_multi_ssa_results[metric], persistence_ssa_results[metric]) 
imp_mlp_multi = improvement(mlp_multi_ssa_results[metric], persistence_ssa_results[metric]) 

imp_df['imp('+metric+')'] = [imp_prst, imp_var, imp_cmvfts, imp_lstm_multi, imp_mlp_multi]

In [62]:
metric = 'U'
imp_prst = improvement(persistence_ssa_results[metric], persistence_ssa_results[metric]) 
imp_var = improvement(var_ssa_results[metric], persistence_ssa_results[metric]) 
imp_cmvfts = improvement(cmvfts_ssa_results[metric], persistence_ssa_results[metric]) 
imp_lstm_multi = improvement(lstm_multi_ssa_results[metric], persistence_ssa_results[metric]) 
imp_mlp_multi = improvement(mlp_multi_ssa_results[metric], persistence_ssa_results[metric]) 

imp_df['imp('+metric+')'] = [imp_prst, imp_var, imp_cmvfts, imp_lstm_multi, imp_mlp_multi]

In [63]:
print(imp_df.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  imp(RMSE) &  imp(SMAPE) &    imp(U) \\
\midrule
Persistence &   0.000000 &    0.000000 &  0.000000 \\
VAR         &   0.362824 &    0.350982 &  0.366469 \\
CMVFTS      &  -0.145933 &   -0.197114 & -0.153846 \\
LSTM\_MULTI  &   0.301120 &    0.286532 &  0.302361 \\
MLP\_MULTI   &   0.278740 &    0.273966 &  0.281550 \\
\bottomrule
\end{tabular}



## Boxplot OAHU SSA Univariate

### SARIMA está FORA ate segunda ordem!!

In [7]:
metric = 'RMSE'


#uni_data = [persistence_ssa_results[metric], sarima_ssa_results[metric],  hofts_ssa_results[metric], cvfts_ssa_results[metric], lstm_uni_ssa_results[metric], mlp_uni_ssa_results[metric]]
#xticks = ['Persistence', 'SARIMA', 'HOFTS','CVFTS','LSTM_UNI','MLP_UNI']

uni_data = [persistence_ssa_results[metric],  hofts_ssa_results[metric], cvfts_ssa_results[metric], lstm_uni_ssa_results[metric], mlp_uni_ssa_results[metric]]
xticks = ['Persistence', 'HOFTS','NSFTS','LSTM_UNI','MLP_UNI']

ylab = 'RMSE'
createBoxplot("boxplot_rmse_wind_ssa_uni", uni_data, xticks, ylab)

In [8]:
metric = 'SMAPE'
uni_data = [persistence_ssa_results[metric],   hofts_ssa_results[metric], cvfts_ssa_results[metric], lstm_uni_ssa_results[metric], mlp_uni_ssa_results[metric]]
xticks = ['Persistence', 'HOFTS','NSFTS','LSTM_UNI','MLP_UNI']

ylab = 'SMAPE'
createBoxplot("boxplot_smape_wind_ssa_uni", uni_data, xticks, ylab)

In [9]:
metric = 'U'
uni_data = [persistence_ssa_results[metric],  hofts_ssa_results[metric], cvfts_ssa_results[metric], lstm_uni_ssa_results[metric], mlp_uni_ssa_results[metric]]
xticks = ['Persistence', 'HOFTS','NSFTS','LSTM_UNI','MLP_UNI']

ylab = 'U Statistic'
createBoxplot("boxplot_u_wind_ssa_uni", uni_data, xticks, ylab)

## Improvement Table Univariate

In [54]:
index = ['Persistence', 'HOFTS','CVFTS','LSTM_UNI','MLP_UNI']
columns = ['imp(RMSE)', 'imp(SMAPE)', 'imp(U)']
metrics = ['RMSE', 'SMAPE', 'U']
imp_df = pd.DataFrame(columns=columns, index=index)

In [55]:
for metric in metrics:
    imp_prst = improvement(persistence_ssa_results[metric], persistence_ssa_results[metric]) 
    imp_hofts = improvement(hofts_ssa_results[metric], persistence_ssa_results[metric]) 
    imp_cvfts = improvement(cvfts_ssa_results[metric], persistence_ssa_results[metric]) 
    imp_lstm_uni = improvement(lstm_uni_ssa_results[metric], persistence_ssa_results[metric]) 
    imp_mlp_uni = improvement(mlp_uni_ssa_results[metric], persistence_ssa_results[metric]) 

    imp_df['imp('+metric+')'] = [imp_prst, imp_hofts, imp_cvfts, imp_lstm_uni, imp_mlp_uni]

In [56]:
print(imp_df.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  imp(RMSE) &  imp(SMAPE) &    imp(U) \\
\midrule
Persistence &   0.000000 &    0.000000 &  0.000000 \\
HOFTS       &   0.192187 &    0.198011 &  0.194922 \\
CVFTS       &   0.179111 &    0.150970 &  0.178945 \\
LSTM\_UNI    &   0.308891 &    0.294866 &  0.310273 \\
MLP\_UNI     &   0.306356 &    0.296070 &  0.308089 \\
\bottomrule
\end{tabular}



## Hybrid Comparison

In [69]:
hybrid_ssa_results = pd.read_csv(results_path + "rolling_cv_wind_ssa_hybrid.csv")

In [70]:
metric = 'RMSE'
multi_data = [persistence_ssa_results[metric], var_ssa_results[metric],lstm_multi_ssa_results[metric], hybrid_ssa_results[metric]]
xticks = ['Persistence','VAR','LSTM_MULTI','Hybrid']

ylab = 'RMSE'
createBoxplot("boxplot_rmse_wind_ssa_hybrid", multi_data, xticks, ylab)

In [None]:
metric = 'SMAPE'
multi_data = [persistence_ssa_results[metric], var_ssa_results[metric],lstm_multi_ssa_results[metric], hybrid_ssa_results[metric]]
xticks = ['Persistence','VAR','LSTM_MULTI','Hybrid']

ylab = 'RMSE'
createBoxplot("boxplot_smape_wind_ssa_hybrid", multi_data, xticks, ylab)

## Boxplot Oahu Raw Multivariate

In [10]:
persistence_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_persistence.csv")
var_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_var.csv")
hofts_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_hofts.csv")
cvfts_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_cvfts.csv")
cmvfts_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_cmvfts.csv")
lstm_multi_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_lstm_multi.csv")
lstm_uni_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_lstm_uni.csv")
mlp_multi_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_mlp_multi.csv")
mlp_uni_raw_results = pd.read_csv(results_path + "rolling_cv_wind_raw_mlp_uni.csv")

In [11]:
RMSE_real = []
for i in cvfts_raw_results.RMSE:
    comp = complex(i)
    RMSE_real.append(comp.real)
cvfts_raw_results['RMSE'] = RMSE_real

U_real = []
for i in cvfts_raw_results.U:
    comp = complex(i)
    U_real.append(comp.real)
cvfts_raw_results['U'] = U_real

In [34]:
metric = 'RMSE'
multi_data = [persistence_raw_results[metric], var_raw_results[metric], cmvfts_raw_results[metric], lstm_multi_raw_results[metric], mlp_multi_raw_results[metric]]
xticks = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']

ylab = 'RMSE'
createBoxplot("boxplot_rmse_wind_raw_multi", multi_data, xticks, ylab)

In [35]:
metric = 'SMAPE'
multi_data = [persistence_raw_results[metric], var_raw_results[metric], cmvfts_raw_results[metric], lstm_multi_raw_results[metric], mlp_multi_raw_results[metric]]
xticks = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']

ylab = 'SMAPE'
createBoxplot("boxplot_smape_wind_raw_multi", multi_data, xticks, ylab)

In [36]:
metric = 'U'
multi_data = [persistence_raw_results[metric], var_raw_results[metric], cmvfts_raw_results[metric], lstm_multi_raw_results[metric], mlp_multi_raw_results[metric]]
xticks = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']

ylab = 'U Statistic'
createBoxplot("boxplot_u_wind_raw_multi", multi_data, xticks, ylab)

## Improvement Table Raw Multivariate

In [67]:
index = ['Persistence','VAR','CMVFTS','LSTM_MULTI','MLP_MULTI']
columns = ['imp(RMSE)', 'imp(SMAPE)', 'imp(U)']
metrics = ['RMSE', 'SMAPE', 'U']
imp_df = pd.DataFrame(columns=columns, index=index)

In [68]:
for metric in metrics:
    imp_prst = improvement(persistence_raw_results[metric], persistence_raw_results[metric]) 
    imp_var = improvement(var_raw_results[metric], persistence_raw_results[metric]) 
    imp_cmvfts = improvement(cmvfts_raw_results[metric], persistence_raw_results[metric]) 
    imp_lstm_multi = improvement(lstm_multi_raw_results[metric], persistence_raw_results[metric]) 
    imp_mlp_multi = improvement(mlp_multi_raw_results[metric], persistence_raw_results[metric]) 

    imp_df['imp('+metric+')'] = [imp_prst, imp_var, imp_cmvfts, imp_lstm_multi, imp_mlp_multi]

print(imp_df.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  imp(RMSE) &  imp(SMAPE) &    imp(U) \\
\midrule
Persistence &   0.000000 &    0.000000 &  0.000000 \\
VAR         &   0.071468 &    0.054230 &  0.073352 \\
CMVFTS      &  -0.627405 &   -0.726083 & -0.637205 \\
LSTM\_MULTI  &   0.069422 &    0.053792 &  0.070807 \\
MLP\_MULTI   &   0.046857 &    0.024046 &  0.047785 \\
\bottomrule
\end{tabular}



## Boxplot Oahu Raw Univariate

In [12]:
metric = 'RMSE'
uni_data = [persistence_raw_results[metric],  hofts_raw_results[metric], cvfts_raw_results[metric], lstm_uni_raw_results[metric], mlp_uni_raw_results[metric]]
xticks = ['Persistence',  'HOFTS','NSFTS','LSTM_UNI','MLP_UNI']

ylab = 'RMSE'
createBoxplot("boxplot_rmse_wind_raw_uni", uni_data, xticks, ylab)

In [13]:
metric = 'SMAPE'
uni_data = [persistence_raw_results[metric],  hofts_raw_results[metric], cvfts_raw_results[metric], lstm_uni_raw_results[metric], mlp_uni_raw_results[metric]]
xticks = ['Persistence', 'HOFTS','NSFTS','LSTM_UNI','MLP_UNI']

ylab = 'SMAPE'
createBoxplot("boxplot_smape_wind_raw_uni", uni_data, xticks, ylab)

In [14]:
metric = 'U'
uni_data = [persistence_raw_results[metric],  hofts_raw_results[metric], cvfts_raw_results[metric], lstm_uni_raw_results[metric], mlp_uni_raw_results[metric]]
xticks = ['Persistence', 'HOFTS','NSFTS','LSTM_UNI','MLP_UNI']

ylab = 'U Statistic'
createBoxplot("boxplot_u_wind_raw_uni", uni_data, xticks, ylab)

## Improvement Table Raw Univariate

In [64]:
index = ['Persistence', 'HOFTS','CVFTS','LSTM_UNI','MLP_UNI']
columns = ['imp(RMSE)', 'imp(SMAPE)', 'imp(U)']
metrics = ['RMSE', 'SMAPE', 'U']
imp_df = pd.DataFrame(columns=columns, index=index)

In [65]:
for metric in metrics:
    imp_prst = improvement(persistence_raw_results[metric], persistence_raw_results[metric]) 
    imp_hofts = improvement(hofts_raw_results[metric], persistence_raw_results[metric]) 
    imp_cvfts = improvement(cvfts_raw_results[metric], persistence_raw_results[metric]) 
    imp_lstm_uni = improvement(lstm_uni_raw_results[metric], persistence_raw_results[metric]) 
    imp_mlp_uni = improvement(mlp_uni_raw_results[metric], persistence_raw_results[metric]) 

    imp_df['imp('+metric+')'] = [imp_prst, imp_hofts, imp_cvfts, imp_lstm_uni, imp_mlp_uni]

In [66]:
print(imp_df.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  imp(RMSE) &  imp(SMAPE) &    imp(U) \\
\midrule
Persistence &   0.000000 &    0.000000 &  0.000000 \\
HOFTS       &  -0.025069 &   -0.047100 & -0.024221 \\
CVFTS       &  -0.065655 &   -0.105628 & -0.067218 \\
LSTM\_UNI    &   0.039902 &    0.020825 &  0.040666 \\
MLP\_UNI     &   0.038545 &    0.017995 &  0.039217 \\
\bottomrule
\end{tabular}

