In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import os 

pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None)  


In [2]:
def process(results_sp,results_dp):
    '''Combines results from the paper and ours in order to compare them side by side'''
    pd.set_option('display.max_colwidth', 1) 
    best_results_dp = results_dp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()
    display(best_results_dp)
    best_results_dp.drop(['Parameters','Number of features'],axis=1,inplace=True)
    best_results_dp.columns = ['Model', 'MAE_DP', 'ME_DP', 'SE_DP', 'MASE_DP']

    best_results_sp = results_sp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()
    display(best_results_sp)
    best_results_sp.drop(['Parameters','Number of features'],axis=1,inplace=True)
    best_results_sp.columns = ['Model', 'MAE_SP', 'ME_SP', 'SE_SP', 'MASE_SP']

    best_results = best_results_sp.merge(best_results_dp,on='Model').round(2)
    
    return best_results

## Preprocessing of input data

In [3]:
results_dp_bcg_1 = pd.read_csv('results_dp_bcg_11_15.csv',index_col =0)
results_dp_bcg_2 = pd.read_csv('results_dp_bcg_13_24.csv',index_col=0)
results_dp_bcg = pd.concat([results_dp_bcg_1,results_dp_bcg_2])

results_dp_ppgbp_1 = pd.read_csv('results_dp_ppgbp_12_10.csv',index_col =0)
results_dp_ppgbp_2 = pd.read_csv('results_dp_ppgbp_13_53.csv',index_col=0)
results_dp_ppgbp = pd.concat([results_dp_ppgbp_1,results_dp_ppgbp_2])

results_dp_sensor_1 = pd.read_csv('results_dp_sensor_04_38.csv',index_col =0)
results_dp_sensor_2 = pd.read_csv('results_dp_sensor_13_14.csv',index_col=0)
results_dp_sensor = pd.concat([results_dp_sensor_1,results_dp_sensor_2])

results_sp_bcg_1 = pd.read_csv('results_sp_bcg_11_15.csv',index_col =0)
results_sp_bcg_2 = pd.read_csv('results_sp_bcg_13_24.csv',index_col=0)
results_sp_bcg = pd.concat([results_sp_bcg_1,results_sp_bcg_2])

results_sp_ppgbp_1 = pd.read_csv('results_sp_ppgbp_12_10.csv',index_col =0)
results_sp_ppgbp_2 = pd.read_csv('results_sp_ppgbp_13_53.csv',index_col=0)
results_sp_ppgbp = pd.concat([results_sp_ppgbp_1,results_sp_ppgbp_2])

results_sp_sensor_1 = pd.read_csv('results_sp_sensor_04_38.csv',index_col =0)
results_sp_sensor_2 = pd.read_csv('results_sp_sensor_13_14.csv',index_col=0)
results_sp_sensor = pd.concat([results_sp_sensor_1,results_sp_sensor_2])


results_dp_bcg['Overall score'] = results_dp_bcg['Overall score'] * (-1) 
results_sp_bcg['Overall score'] = results_sp_bcg['Overall score'] * (-1) 
results_dp_ppgbp['Overall score'] = results_dp_ppgbp['Overall score'] * (-1) 
results_sp_ppgbp['Overall score'] = results_sp_ppgbp['Overall score'] * (-1) 
results_dp_sensor['Overall score'] = results_dp_sensor['Overall score'] * (-1) 
results_sp_sensor['Overall score'] = results_sp_sensor['Overall score'] * (-1) 

results_dp_bcg['MASE'] = results_dp_bcg['MASE'] * (-100) 
results_sp_bcg['MASE'] = results_sp_bcg['MASE'] * (-100) 
results_dp_ppgbp['MASE'] = results_dp_ppgbp['MASE'] * (-100) 
results_sp_ppgbp['MASE'] = results_sp_ppgbp['MASE'] * (-100) 
results_dp_sensor['MASE'] = results_dp_sensor['MASE'] * (-100) 
results_sp_sensor['MASE'] = results_sp_sensor['MASE'] * (-100) 

results_dp_bcg = results_dp_bcg.round(2)
results_sp_bcg = results_sp_bcg.round(2)
results_dp_ppgbp = results_dp_ppgbp.round(2)
results_sp_ppgbp = results_sp_ppgbp.round(2)
results_sp_sensor = results_sp_sensor.round(2)
results_dp_sensor = results_dp_sensor.round(2)


results_dp_bcg.Parameters = results_dp_bcg.Parameters.str.replace("'activation': 'relu',",'').str.replace(", 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'adam'",'')
results_sp_bcg.Parameters = results_sp_bcg.Parameters.str.replace("'activation': 'relu',",'').str.replace(", 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'adam'",'')
results_dp_ppgbp.Parameters = results_dp_ppgbp.Parameters.str.replace("'activation': 'relu',",'').str.replace(", 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'adam'",'')
results_sp_ppgbp.Parameters = results_sp_ppgbp.Parameters.str.replace("'activation': 'relu',",'').str.replace(", 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'adam'",'')
results_sp_sensor.Parameters = results_sp_sensor.Parameters.str.replace("'activation': 'relu',",'').str.replace(", 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'adam'",'')
results_dp_sensor.Parameters = results_dp_sensor.Parameters.str.replace("'activation': 'relu',",'').str.replace(", 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'adam'",'')


## Sensor data

In [4]:
data = {
    'Model': ['SVR()_paper', 'RandomForestRegressor()_paper', 'MLPRegressor()_paper'],
    'MAE_SP': [15.60, 15.86, 16.03],
    'ME_SE_SP': ['-0.00±19.68', '-0.12±19.85', '-0.50±20.10'],
    'MASE_SP': [88.62, 90.08, 91.03],
    'MAE_DP': [7.50, 7.66, 7.77],
    'ME_SE_DP': ['-1.45±9.81', '-0.03±9.86', '-0.19±10.04'],
    'MASE_DP': [90.76, 92.63, 94.05]
}
paper_sensor = pd.DataFrame(data)

# Split the 'ME_SE' column into two separate columns
paper_sensor[['ME_SP', 'SE_SP']] = paper_sensor['ME_SE_SP'].str.split('±', expand=True).astype(float)
paper_sensor[['ME_DP', 'SE_DP']] = paper_sensor['ME_SE_DP'].str.split('±', expand=True).astype(float)
paper_sensor.drop(['ME_SE_SP', 'ME_SE_DP'], axis=1, inplace=True)
paper_sensor = paper_sensor[['Model', 'MAE_SP', 'ME_SP', 'SE_SP', 'MASE_SP', 'MAE_DP', 'ME_DP', 'SE_DP', 'MASE_DP']]


In [5]:
print('Results for the Sensor dataset \n')
print('Results of DP')
display(results_dp_sensor.sort_values(by = 'Overall score'))
print('Results of SP')
display(results_sp_sensor.sort_values(by = 'Overall score'))

Results for the Sensor dataset 

Results of DP


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'rbf'}",20,7.56,-1.73,9.65,91.61
0,SVR(),"{'C': 10, 'epsilon': 0.2, 'kernel': 'linear'}",100,7.59,-1.65,9.68,91.88
0,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",50,7.6,-1.68,9.71,92.0
0,SVR(),"{'C': 10, 'epsilon': 0.3, 'kernel': 'linear'}",150,7.61,-1.66,9.65,92.12
0,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'rbf'}",8,7.61,-1.68,9.73,92.11
0,SVR(),"{'C': 100, 'epsilon': 0.2, 'kernel': 'linear'}",16,7.62,-1.74,9.84,92.24
0,SVR(),"{'C': 100, 'epsilon': 0.2, 'kernel': 'rbf'}",12,7.63,-1.76,9.76,92.38
1,MLPRegressor(),"{ 'alpha': 0.1, 'hidden_layer_sizes': (25, 25)}",20,7.63,0.79,9.77,92.39
0,SVR(),"{'C': 1, 'epsilon': 0.1, 'kernel': 'linear'}",200,7.63,-1.65,9.68,92.37
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 5, 'n_e...",50,7.64,-0.04,7.37,92.51


Results of SP


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
2,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 5, 'n...",150,15.64,0.04,7.56,89.02
2,RandomForestRegressor(),"{'max_depth': 20, 'min_samples_split': 10, 'n_...",200,15.65,0.05,9.15,89.08
2,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 10, '...",100,15.65,0.06,8.99,89.02
2,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 2, 'n...",50,15.69,0.06,7.04,89.26
0,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",100,15.75,-0.53,19.39,89.59
0,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",200,15.76,-0.19,19.23,89.66
0,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'linear'}",150,15.77,-0.55,19.27,89.74
1,MLPRegressor(),"{ 'alpha': 0.01, 'hidden_layer_sizes': (25, 25)}",200,15.8,0.76,19.07,89.88
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_e...",20,15.82,0.0,15.49,89.99
0,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'linear'}",50,15.86,-0.56,19.65,90.24


In [6]:
best_results_sensor = process(results_sp_sensor,results_dp_sensor)
best_results_sensor

  best_results_dp = results_dp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,MLPRegressor(),"{ 'alpha': 0.01, 'hidden_layer_sizes': (50, 25)}",50,7.73,0.63,9.45,93.65
1,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}",100,7.65,-0.06,7.29,92.6
2,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'rbf'}",20,7.56,-1.73,9.65,91.61


  best_results_sp = results_sp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,MLPRegressor(),"{ 'alpha': 0.01, 'hidden_layer_sizes': (25, 25)}",50,15.98,-0.11,19.64,90.91
1,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 100}",100,15.65,0.06,8.99,89.02
2,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'linear'}",20,16.09,-0.49,20.07,91.58


Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,MLPRegressor(),15.98,-0.11,19.64,90.91,7.73,0.63,9.45,93.65
1,RandomForestRegressor(),15.65,0.06,8.99,89.02,7.65,-0.06,7.29,92.6
2,SVR(),16.09,-0.49,20.07,91.58,7.56,-1.73,9.65,91.61


In [7]:
final_sensor = pd.concat([best_results_sensor,paper_sensor]).sort_values(by='Model')
final_sensor

Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,MLPRegressor(),15.98,-0.11,19.64,90.91,7.73,0.63,9.45,93.65
2,MLPRegressor()_paper,16.03,-0.5,20.1,91.03,7.77,-0.19,10.04,94.05
1,RandomForestRegressor(),15.65,0.06,8.99,89.02,7.65,-0.06,7.29,92.6
1,RandomForestRegressor()_paper,15.86,-0.12,19.85,90.08,7.66,-0.03,9.86,92.63
2,SVR(),16.09,-0.49,20.07,91.58,7.56,-1.73,9.65,91.61
0,SVR()_paper,15.6,-0.0,19.68,88.62,7.5,-1.45,9.81,90.76


## BCG data 

In [8]:
data = {
    'Model': ['SVR()_paper', 'RandomForestRegressor()_paper', 'MLPRegressor()_paper'],
    'MAE_SP': [11.45, 12.88, 12.98],
    'ME_SE_SP': ['-0.79±15.56', '-1.46±17.75', '-0.27±16.35'],
    'MASE_SP': [93.07, 104.72, 105.50],
    'MAE_DP': [7.34, 7.89, 7.14],
    'ME_SE_DP': ['0.01±9.88', '-0.01±10.44', '0.03±9.28'],
    'MASE_DP': [92.75, 99.77, 90.24]
}
bcg = pd.DataFrame(data)

# Split the 'ME_SE' column into two separate columns, drop old ones and reorder df
bcg[['ME_SP', 'SE_SP']] = bcg['ME_SE_SP'].str.split('±', expand=True).astype(float)
bcg[['ME_DP', 'SE_DP']] = bcg['ME_SE_DP'].str.split('±', expand=True).astype(float)
bcg.drop(['ME_SE_SP', 'ME_SE_DP'], axis=1, inplace=True)
paper_bcg = bcg[['Model', 'MAE_SP', 'ME_SP', 'SE_SP', 'MASE_SP', 'MAE_DP', 'ME_DP', 'SE_DP', 'MASE_DP']]


paper_bcg

Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,SVR()_paper,11.45,-0.79,15.56,93.07,7.34,0.01,9.88,92.75
1,RandomForestRegressor()_paper,12.88,-1.46,17.75,104.72,7.89,-0.01,10.44,99.77
2,MLPRegressor()_paper,12.98,-0.27,16.35,105.5,7.14,0.03,9.28,90.24


In [9]:
print('Results for the BCG dataset \n')
print('Results of DP')
display(results_dp_bcg.sort_values(by = 'Overall score'))
print('Results of SP')
display(results_sp_bcg.sort_values(by = 'Overall score'))

Results for the BCG dataset 

Results of DP


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,SVR(),"{'C': 1, 'epsilon': 0.3, 'kernel': 'linear'}",16,6.65,-0.29,6.95,89.67
1,MLPRegressor(),"{ 'alpha': 0.1, 'hidden_layer_sizes': (25,)}",16,6.68,0.0,6.29,90.04
0,SVR(),"{'C': 0.1, 'epsilon': 0.1, 'kernel': 'linear'}",12,6.87,-0.59,7.35,92.62
0,SVR(),"{'C': 1, 'epsilon': 0.3, 'kernel': 'linear'}",20,6.89,-0.2,6.92,92.99
1,MLPRegressor(),"{ 'alpha': 0.01, 'hidden_layer_sizes': (25,)}",12,6.89,-0.06,6.72,92.96
1,MLPRegressor(),"{ 'alpha': 0.01, 'hidden_layer_sizes': (50, 25)}",20,6.93,-0.1,5.02,93.48
0,SVR(),"{'C': 0.1, 'epsilon': 0.1, 'kernel': 'rbf'}",8,7.3,-0.95,8.1,98.45
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 50}",16,7.31,-0.01,2.95,98.59
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}",20,7.42,-0.01,2.92,100.14
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 100}",12,7.42,-0.02,3.37,100.11


Results of SP


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,SVR(),"{'C': 0.1, 'epsilon': 0.3, 'kernel': 'rbf'}",200,11.34,-0.87,15.07,101.96
0,SVR(),"{'C': 0.1, 'epsilon': 0.3, 'kernel': 'rbf'}",50,11.35,-1.07,15.24,102.11
0,SVR(),"{'C': 0.1, 'epsilon': 0.3, 'kernel': 'rbf'}",150,11.35,-0.99,15.15,102.11
0,SVR(),"{'C': 0.1, 'epsilon': 0.3, 'kernel': 'rbf'}",100,11.36,-0.99,15.21,102.17
0,SVR(),"{'C': 0.1, 'epsilon': 0.3, 'kernel': 'linear'}",8,11.41,-0.03,14.37,102.6
0,SVR(),"{'C': 0.1, 'epsilon': 0.2, 'kernel': 'linear'}",4,11.41,-1.0,15.27,102.58
0,SVR(),"{'C': 0.1, 'epsilon': 0.1, 'kernel': 'rbf'}",12,11.49,-0.55,14.21,103.29
0,SVR(),"{'C': 0.1, 'epsilon': 0.1, 'kernel': 'rbf'}",20,11.52,-0.4,13.99,103.56
0,SVR(),"{'C': 0.1, 'epsilon': 0.2, 'kernel': 'linear'}",16,11.52,-0.04,14.16,103.62
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 100}",20,11.98,0.02,4.97,107.76


In [10]:
best_results_bcg = process(results_sp_bcg,results_dp_bcg)
best_results_bcg

  best_results_dp = results_dp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,MLPRegressor(),"{ 'alpha': 0.1, 'hidden_layer_sizes': (50,)}",50,8.01,-0.2,5.35,108.0
1,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}",100,7.75,-0.02,2.58,104.58
2,SVR(),"{'C': 1, 'epsilon': 0.3, 'kernel': 'linear'}",20,6.89,-0.2,6.92,92.99


  best_results_sp = results_sp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,MLPRegressor(),"{ 'alpha': 0.1, 'hidden_layer_sizes': (50, 25)}",50,13.14,-0.97,7.95,118.16
1,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 50}",100,12.23,0.04,4.03,109.97
2,SVR(),"{'C': 0.1, 'epsilon': 0.1, 'kernel': 'rbf'}",20,11.52,-0.4,13.99,103.56


Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,MLPRegressor(),13.14,-0.97,7.95,118.16,8.01,-0.2,5.35,108.0
1,RandomForestRegressor(),12.23,0.04,4.03,109.97,7.75,-0.02,2.58,104.58
2,SVR(),11.52,-0.4,13.99,103.56,6.89,-0.2,6.92,92.99


In [11]:
final_bcg = pd.concat([best_results_bcg,paper_bcg]).sort_values(by='Model')
final_bcg

Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,MLPRegressor(),13.14,-0.97,7.95,118.16,8.01,-0.2,5.35,108.0
2,MLPRegressor()_paper,12.98,-0.27,16.35,105.5,7.14,0.03,9.28,90.24
1,RandomForestRegressor(),12.23,0.04,4.03,109.97,7.75,-0.02,2.58,104.58
1,RandomForestRegressor()_paper,12.88,-1.46,17.75,104.72,7.89,-0.01,10.44,99.77
2,SVR(),11.52,-0.4,13.99,103.56,6.89,-0.2,6.92,92.99
0,SVR()_paper,11.45,-0.79,15.56,93.07,7.34,0.01,9.88,92.75


## PPGBP

In [12]:
data = {
    'Model': ['SVR()_paper', 'RandomForestRegressor()_paper', 'MLPRegressor()_paper'],
    'MAE_SP': [13.15, 13.17, 13.38],
    'ME_SE_SP': ['-0.64±17.05', '0.02±16.81', '-0.13±17.09'],
    'MASE_SP': [80.29, 80.42, 81.69],
    'MAE_DP': [8.04, 8.12, 8.21],
    'ME_SE_DP': ['-0.22±10.14', '0.19±10.17', '-0.16±10.40'],
    'MASE_DP': [90.90, 91.76, 92.77]
}

ppgbp_paper = pd.DataFrame(data)

ppgbp_paper[['ME_SP', 'SE_SP']] = ppgbp_paper['ME_SE_SP'].str.split('±', expand=True).astype(float)
ppgbp_paper[['ME_DP', 'SE_DP']] = ppgbp_paper['ME_SE_DP'].str.split('±', expand=True).astype(float)

ppgbp_paper.drop(['ME_SE_SP', 'ME_SE_DP'], axis=1, inplace=True)

paper_ppgbp = ppgbp_paper[['Model', 'MAE_SP', 'ME_SP', 'SE_SP', 'MASE_SP', 'MAE_DP', 'ME_DP', 'SE_DP', 'MASE_DP']]

paper_ppgbp

Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,SVR()_paper,13.15,-0.64,17.05,80.29,8.04,-0.22,10.14,90.9
1,RandomForestRegressor()_paper,13.17,0.02,16.81,80.42,8.12,0.19,10.17,91.76
2,MLPRegressor()_paper,13.38,-0.13,17.09,81.69,8.21,-0.16,10.4,92.77


In [13]:
print('Results for the PPGBP dataset \n')
print('Results of DP')
display(results_dp_ppgbp.sort_values(by = 'Overall score'))
print('Results of SP')
display(results_sp_ppgbp.sort_values(by = 'Overall score'))

Results for the PPGBP dataset 

Results of DP


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
2,RandomForestRegressor(),"{'max_depth': 20, 'min_samples_split': 5, 'n_estimators': 50}",20,7.82,-0.04,3.84,88.43
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 50}",16,7.84,0.06,4.31,88.73
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}",50,7.87,-0.02,4.27,89.04
2,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 100}",12,7.99,-0.01,5.11,90.36
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}",100,8.0,0.08,3.98,90.54
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 50}",8,8.06,0.03,4.88,91.2
0,SVR(),"{'C': 100, 'epsilon': 0.2, 'kernel': 'rbf'}",20,8.06,-1.19,10.01,91.24
0,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'rbf'}",16,8.07,-1.1,10.03,91.3
0,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",4,8.08,-1.29,10.17,91.38
0,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",8,8.11,-1.32,10.09,91.73


Results of SP


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
2,RandomForestRegressor(),"{'max_depth': 20, 'min_samples_split': 10, 'n_estimators': 100}",12,12.89,-0.06,8.68,78.79
2,RandomForestRegressor(),"{'max_depth': 20, 'min_samples_split': 10, 'n_estimators': 100}",20,12.91,-0.05,8.16,78.91
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}",16,12.94,-0.08,7.89,79.06
2,RandomForestRegressor(),"{'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 100}",150,12.98,0.05,6.01,79.32
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}",50,13.0,-0.05,6.75,79.46
2,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 50}",100,13.02,0.14,7.68,79.58
2,RandomForestRegressor(),"{'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}",200,13.11,0.11,7.11,80.09
2,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 100}",8,13.14,0.05,8.87,80.3
0,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'linear'}",16,13.22,-0.9,16.76,80.81
0,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",20,13.27,-0.7,16.82,81.12


In [14]:
best_results_ppgbp = process(results_sp_ppgbp,results_dp_ppgbp)
best_results_ppgbp

  best_results_dp = results_dp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,MLPRegressor(),"{ 'alpha': 0.1, 'hidden_layer_sizes': (50, 25)}",8,8.98,-0.55,11.0,101.59
1,RandomForestRegressor(),"{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 100}",12,7.99,-0.01,5.11,90.36
2,SVR(),"{'C': 100, 'epsilon': 0.1, 'kernel': 'linear'}",4,8.08,-1.29,10.17,91.38


  best_results_sp = results_sp.groupby('Model').apply(lambda df: df.iloc[df['Overall score'].idxmax(),1:]).reset_index()


Unnamed: 0,Model,Parameters,Number of features,Overall score,ME,SD,MASE
0,MLPRegressor(),"{ 'alpha': 0.01, 'hidden_layer_sizes': (50, 25)}",8,17.17,1.01,19.6,104.95
1,RandomForestRegressor(),"{'max_depth': 20, 'min_samples_split': 10, 'n_estimators': 100}",12,12.89,-0.06,8.68,78.79
2,SVR(),"{'C': 100, 'epsilon': 0.3, 'kernel': 'linear'}",4,14.32,-1.29,18.2,87.5


Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,MLPRegressor(),17.17,1.01,19.6,104.95,8.98,-0.55,11.0,101.59
1,RandomForestRegressor(),12.89,-0.06,8.68,78.79,7.99,-0.01,5.11,90.36
2,SVR(),14.32,-1.29,18.2,87.5,8.08,-1.29,10.17,91.38


In [15]:
final_ppgbp = pd.concat([best_results_ppgbp,paper_ppgbp]).sort_values(by='Model')
final_ppgbp

Unnamed: 0,Model,MAE_SP,ME_SP,SE_SP,MASE_SP,MAE_DP,ME_DP,SE_DP,MASE_DP
0,MLPRegressor(),17.17,1.01,19.6,104.95,8.98,-0.55,11.0,101.59
2,MLPRegressor()_paper,13.38,-0.13,17.09,81.69,8.21,-0.16,10.4,92.77
1,RandomForestRegressor(),12.89,-0.06,8.68,78.79,7.99,-0.01,5.11,90.36
1,RandomForestRegressor()_paper,13.17,0.02,16.81,80.42,8.12,0.19,10.17,91.76
2,SVR(),14.32,-1.29,18.2,87.5,8.08,-1.29,10.17,91.38
0,SVR()_paper,13.15,-0.64,17.05,80.29,8.04,-0.22,10.14,90.9
