In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
import sklearn.metrics as metrics
from dm_test.dm_test import dm_test as dm

In [2]:
with open('results_pkl/proposed.pkl', 'rb') as handle:
    dict_results = pkl.load(handle)
metric_res = dict_results['metric']
params_res = dict_results['params']
value_res = dict_results['value']

In [3]:
dm_p_level = 0.05

In [4]:
models_names = [m[0] for m in metric_res[0][1]]
models_names.remove('Proposed')

In [5]:
def get_percentage_difference(base, proposed):
    return ((base-proposed)/base)*100

model_columns = models_names + ["DSNAW", "DSLA"]
df_percentage_mse = pd.DataFrame(columns = ["Time Series"] + model_columns)
dsnaw_values = {}
dsla_values = {}
for f in value_res.keys():
    dsnaw_values[f] = pd.read_csv('results/dsnaw_pred/' + f + '.csv')['DSNAW'].values
    dsla_values[f] = pd.read_csv('results/dsla_pred/' + f + '.csv')['DSLA'].values
for file_name, file_res in value_res.items():
    new_row = {'Time Series': file_name}
    real = file_res['Real']
    proposed = file_res['Proposed']
    mse_proposed = metrics.mean_squared_error(real, proposed)    
    new_row["DSNAW"] = get_percentage_difference(metrics.mean_squared_error(real, dsnaw_values[file_name]), mse_proposed)
    new_row["DSLA"] = get_percentage_difference(metrics.mean_squared_error(real, dsla_values[file_name]), mse_proposed)
    for model_name, model_value in file_res.items():
        if model_name != 'Real' and model_name != 'Proposed':
            new_row[model_name] = get_percentage_difference(metrics.mean_squared_error(real, model_value), mse_proposed)
    new_row = pd.Series(new_row)
    df_percentage_mse = pd.concat([df_percentage_mse, new_row.to_frame().T], ignore_index=True)
        
df_percentage_mse[model_columns] = df_percentage_mse[model_columns].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_percentage_mse.to_csv('results/percentage_difference_mse.csv', index=False, float_format="%.2f")

In [6]:
def get_dm_test_result(real, base, proposed, value):
    h = int(((real.size)**(1/3)) + 1)
    p = dm(real, base, proposed, h, crit="MSE").p_value
    if p > dm_p_level:
        return '~'
    elif value > 0:
        return '+'
    else:
        return '-'

df_dm_test = pd.DataFrame(columns = ["Time Series"] + model_columns)
for file_name, file_res in value_res.items():
    new_row = {'Time Series': file_name}
    real = file_res['Real']
    proposed = file_res['Proposed']
    new_row["DSNAW"] = get_dm_test_result(real, dsnaw_values[file_name], proposed, df_percentage_mse.loc[df_percentage_mse['Time Series'] == file_name][["DSNAW"]].values[0][0])
    new_row["DSLA"] = get_dm_test_result(real, dsla_values[file_name], proposed, df_percentage_mse.loc[df_percentage_mse['Time Series'] == file_name][["DSLA"]].values[0][0])
    for model_name, model_value in file_res.items():
        if model_name != 'Real' and model_name != 'Proposed':
            new_row[model_name] = get_dm_test_result(real, model_value, proposed, df_percentage_mse.loc[df_percentage_mse['Time Series'] == file_name][[model_name]].values[0][0])
    new_row = pd.Series(new_row)
    df_dm_test = pd.concat([df_dm_test, new_row.to_frame().T], ignore_index=True)
    


wins = {'Time Series': 'Wins'}
ties = {'Time Series': 'Ties'}
loss = {'Time Series': 'Loss'}

for m in model_columns:
    counts = df_dm_test[m].value_counts()
    for row, symbol in [(wins, '+'), (ties, '~'), (loss, '-')]:
        if (symbol in counts.index):
            row[m] = counts[symbol]
        else:
            row[m] = 0

for row in [wins, ties, loss]:
    s = pd.Series(row)
    df_dm_test = pd.concat([df_dm_test, s.to_frame().T], ignore_index=True)
        
    
df_dm_test.to_csv('results/dm_test.csv', index=False)

In [7]:
params_df = pd.DataFrame(columns=['Time Series', 'k', 'n', 'comb'])

for f in params_res:
    new_row = {'Time Series': f[0]}
    for key, value in f[1].items():
        new_row[key] = value
    new_row = pd.Series(new_row)
    params_df = pd.concat([params_df, new_row.to_frame().T], ignore_index=True)
    
    
params_df

Unnamed: 0,Time Series,k,n,comb
0,amz,10,6,median
1,APPLE,6,1,median
2,electricity,13,4,average
3,goldman,5,1,median
4,msft,11,4,median
5,pollutions,2,6,median
6,star,15,1,median
7,sunspot,1,3,median
8,vehicle,19,2,median
9,wine,14,6,median


In [8]:
columns_models = models_names + ['Proposed']

metrics_df = pd.DataFrame(columns=['Time Series', 'Metrics'] + columns_models)

metrics_list = list(metric_res[0][1][0][1].keys())

for f in metric_res:
    f_name = f[0]
    rows = {}
    for me in metrics_list:
        rows[me] = []
    for mo in f[1]:
        mo_name = mo[0]
        for me, value in mo[1].items():
            rows[me].append((mo_name, value))
    for me, res in rows.items():
        new_row = {'Time Series': f_name, 'Metrics': me}
        for r in res:
            new_row[r[0]] = r[1]
        new_row = pd.Series(new_row)
        metrics_df = pd.concat([metrics_df, new_row.to_frame().T], ignore_index=True)
        
        
df_dsnaw = pd.read_csv('results/metrics_results_dsnaw_bagging.csv') 
df_dsla = pd.read_csv('results/metrics_results_dsla_bagging.csv') 

metrics_df['DSNAW'] = df_dsnaw['DSNAW']
metrics_df['DSLA'] = df_dsla['DSLA']

columns_models.insert(-1, 'DSNAW')
columns_models.insert(-1, 'DSLA')
metrics_df = metrics_df[['Time Series', 'Metrics'] + columns_models]
metrics_df[columns_models] = metrics_df[columns_models].apply(lambda x: pd.to_numeric(x, errors='coerce'))
metrics_df['Best'] = metrics_df[columns_models].idxmin(axis=1)
pd.set_option('display.float_format', '{:.2E}'.format)
print(metrics_df)

   Time Series Metrics      SVR       GB       RF    ARIMA      DBN      MLP  \
0          amz     MSE 1.17E-04 1.24E-04 1.18E-04 1.17E-04 1.17E-04 1.19E-04   
1          amz    MAPE 1.47E+01 1.58E+01 1.47E+01 1.47E+01 1.49E+01 1.48E+01   
2          amz     ARV 3.30E-02 3.51E-02 3.32E-02 3.29E-02 3.29E-02 3.35E-02   
3          amz     MAE 8.40E-03 8.62E-03 8.41E-03 8.42E-03 8.44E-03 8.49E-03   
4          amz    RMSE 1.08E-02 1.11E-02 1.09E-02 1.08E-02 1.08E-02 1.09E-02   
..         ...     ...      ...      ...      ...      ...      ...      ...   
65        wine     ARV 4.45E-01 8.32E-01 1.10E+00 2.65E+00 1.69E+00 4.86E-01   
66        wine     MAE 3.45E-02 5.24E-02 4.38E-02 4.78E-02 4.57E-02 2.96E-02   
67        wine    RMSE 4.11E-02 6.70E-02 5.91E-02 6.68E-02 7.02E-02 3.91E-02   
68        wine   NRMSE 1.26E-01 2.05E-01 1.81E-01 2.05E-01 2.15E-01 1.20E-01   
69        wine   SMAPE 4.16E+01 6.59E+01 4.57E+01 4.70E+01 4.89E+01 3.68E+01   

        ELM    DSNAW     DSLA  Proposed

In [9]:
params_df.to_csv('results/params_results.csv', index=False)
metrics_df.to_csv('results/metrics_results.csv', index=False, float_format="%.2E")

In [10]:
metrics_df['Best'].value_counts()

MLP         20
Proposed    20
SVR          9
DSNAW        8
ARIMA        5
DSLA         3
DBN          2
RF           1
GB           1
ELM          1
Name: Best, dtype: int64

In [11]:
dsnaw_original = [1.30E-4, 22.40, 3.93E-2, 8.94E-3, 1.14E-2, 4.61E-2, 13.80,
        1.90E-2, 245.72, 2.25, 8.02E-2, 1.38E-1, 3.21E-1, 42.15, 
         3.74E-3,6.933, 2.67E-1, 5.18E-2, 6.11E-2, 1.35E-1, 7.25,
         4.06E-4, 9.52, 2.64E-2, 1.53E-2, 2.01E-2, 4.71E-2, 9.44,
         1.99E-3, 5.03, 1.28E-1, 3.41E-2, 4.46E-2, 9.51E-2, 5.08,
         4.47E-2, 32.26, 1.78, 1.74E-1, 2.11E-1, 4.18E-1, 37.43,
         6.77E-5, 2.99, 1.07E-3, 6.08E-3, 8.23E-3, 8.31E-3, 3.77,
         3.31E-2, 62.44, 9.55E-1, 1.14E-1, 1.82E-1, 1.84E-1, 46.31,
         1.81E-2, 15.39, 1.45, 1.09E-1, 1.34E-1, 2.30E-1, 15.14,
         2.94E-3, 135.02, 1.39, 3.54E-2, 5.43E-2, 2.40E-1, 43.84
        ]
df_dsnaw_2 = pd.DataFrame()
df_dsnaw_2['Time Series'] = metrics_df['Time Series']
df_dsnaw_2['Metrics'] = metrics_df['Metrics']
df_dsnaw_2['DSNAW_ORIGINAL'] = dsnaw_original
df_dsnaw_2['DSNAW'] = metrics_df['DSNAW']

col = ['DSNAW', 'DSNAW_ORIGINAL']

df_dsnaw_2[col] = df_dsnaw_2[col].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_dsnaw_2['Best'] = df_dsnaw_2[col].idxmin(axis=1)
df_dsnaw_2['Best'].value_counts()

DSNAW             58
DSNAW_ORIGINAL    12
Name: Best, dtype: int64

In [12]:
df_direct = pd.DataFrame()
df_direct[['Time Series', 'Metrics', 'DSNAW', 'Proposed']] = metrics_df[['Time Series', 'Metrics', 'DSNAW', 'Proposed']]
col = ['DSNAW', 'Proposed']
df_direct[col] = df_direct[col].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_direct['Best'] = df_direct[col].idxmin(axis=1)
df_direct['Best'].value_counts()

Proposed    47
DSNAW       23
Name: Best, dtype: int64

In [13]:
df_direct.to_csv('results/direct_comparison.csv', index=False, float_format="%.2E")