In [102]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.path as mpath
import numpy as np
from numpy import absolute, mean, std
from sklearn import preprocessing, metrics
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error, explained_variance_score
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold, KFold, validation_curve
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

In [112]:
def readCSVFiles():

    df01 = pd.read_csv("bellman_performance_results_final.csv")
    df02 = pd.read_csv("dijkstra_performance_results_final.csv")
    df03 = pd.read_csv("hybrid_performance_results_final.csv")

    return df01, df02, df03

def combineAlgorithmsToFile(df01, df02, df03):
    df01['AlgoBellman'] = 1
    df01['AlgoDijkstra'] = 0
    df01['AlgoHybrid'] = 0
    df02['AlgoBellman'] = 0
    df02['AlgoDijkstra'] = 1
    df02['AlgoHybrid'] = 0
    df03['AlgoBellman'] = 0
    df03['AlgoDijkstra'] = 0
    df03['AlgoHybrid'] = 1
    frames = [df01, df02, df03]
    concat = pd.concat(frames)
    df1 = concat
    
    return df1

def preprocessing(df1):
    conditions = [df1['vertexNum'] == 524288, df1['vertexNum'] == 1048576, df1['vertexNum'] == 2097152, df1['vertexNum'] == 4194304, df1['vertexNum'] == 8388608, df1['vertexNum'] == 16777216, df1['vertexNum'] == 33554432]
    numbers = [1, 2, 4, 8, 16, 32, 64]
    df1['graphSize'] = np.select(conditions, numbers, default=0)
    
    df1 = df1.drop(['vertexNum', 'edgeNum', 'maxEdgeDegree',
                    'sAtomicMinBlock', 'sAtomicMaxBlock',
                    'sAtomicAddBlock'], axis=1)
    df1.dropna(inplace=True)
    
    df1 = df1[df1['minProcessEdge'] >= 0]
              
    for i in numbers:
        mini = df1[df1['graphSize'] == i]['minProcessEdge'].min()
        maxi = df1[df1['graphSize'] == i]['minProcessEdge'].max()
        df1.loc[df1['graphSize'] == i, ['minProcessEdge']] = 1.0 / ((((df1.loc[df1['graphSize'] == i, ['minProcessEdge']] - mini) / (maxi - mini)) * 9) + 1)

    return df1

def _regressorAlgorithm(model, x_train, x_test, y_train, y_test):
    #fit to model
    model.fit(x_train, y_train)
    #predict the outcome
    y_pred = model.predict(x_test)
    #summarize performance
    print("\n### Performance Summarization of _regressorAlgorithm ###\n")
    print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
    print('R2 score: ', r2_score(y_test, y_pred))
    print('Explained Variance Score: ', explained_variance_score(y_test, y_pred))
    print('Mean Absolute Percentage Error: ', mean_absolute_percentage_error(y_test, y_pred))
    print('MAPE: ', MAPE(y_test, y_pred))
    
    return y_pred, y_test

def _regressorAlgorithm_KFold(model, X, y):
    #model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
    cv = RepeatedKFold(n_splits=10, n_repeats=5, random_state=1)
    # evaluate the model and collect the scores
    n_scores_mae = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    n_scores_mse = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
    n_scores_rmse = cross_val_score(model, X, y, scoring='neg_root_mean_squared_error', cv=cv, n_jobs=-1)
    n_scores_r2 = cross_val_score(model, X, y, scoring='r2', cv=cv, n_jobs=-1)
    n_scores_ev = cross_val_score(model, X, y, scoring='explained_variance', cv=cv, n_jobs=-1)
    n_scores_mape = cross_val_score(model, X, y, scoring='neg_mean_absolute_percentage_error', cv=cv, n_jobs=-1)
    # force the scores to be positive
    n_scores_mae = absolute(n_scores_mae)
    n_scores_mse = absolute(n_scores_mse)
    n_scores_rmse = absolute(n_scores_rmse)
    n_scores_r2 = absolute(n_scores_r2)
    n_scores_ev = absolute(n_scores_ev)
    n_scores_mape = absolute(n_scores_mape)
    # summarize performance
    print("\n### Performance Summarization of _regressorAlgorithm_KFold ###\n")
    print('Mean Absolute Error (avg): %.5f' % mean(n_scores_mae))
    print('Mean Squared Error (avg): %.5f' % mean(n_scores_mse))
    print('Root Mean Squared Error (avg): %.5f' % mean(n_scores_rmse))
    print('R2 score (avg) %.5f' % mean(n_scores_r2))
    print('Explained Variance (avg): %.5f' % mean(n_scores_ev))
    print('Mean Absolute Percentage Error (avg): %.5f' % mean(n_scores_mape))
    
def draw_BFDJ(arr1_b, arr2_b, arr1_d, arr2_d, x_label, y_label):
    
    asterisk = mpath.Path.unit_regular_asterisk(6)
    circle = mpath.Path.unit_circle()

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(arr1_b, arr2_b, color='red', label='predicted_data', marker=asterisk, markersize=7)
    ax.plot(arr1_d, arr2_d, color='blue', label='test_data', marker=circle, markersize=4)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    plt.legend()
    plt.show()
    
def lineGraphOfPredictionError(y_test, y_pred):
    y_pred_exec, y_pred_err, y_test_exec, y_test_err = [], [], [], []
    y_test_n = y_test.to_numpy()
    y_pred_n = y_pred
    y_test_n.sort()
    y_pred_n.sort()
    for i in range(20):
        y_pred_exec.append(y_pred_n[i][1])
        y_pred_err.append(y_pred_n[i][0])
        y_test_exec.append(y_test_n[i][1])
        y_test_err.append(y_test_n[i][0])

    y_pred_exec.sort()
    y_test_exec.sort()
    y_pred_err.sort()
    y_test_err.sort()
    print(y_pred_exec)
    print(y_test_exec)


    draw_BFDJ(y_pred_exec, y_pred_err, y_test_exec, y_test_err, "execution time", "error")
    
def MAPE(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    acc_ex, acc_err = 0, 0
    
    for i in range(len(y_true)):
        if y_true[i][0] != 0:
            acc_ex += np.abs((y_true[i][0] - y_pred[i][0]) / y_true[i][0])
        elif y_true[i][0] == 0 and y_pred[i][0] != 0:
            acc_ex += np.abs((y_true[i][0] - y_pred[i][0]) / y_pred[i][0])
        print(acc_ex)
        
    mean_ex = (acc_ex / len(y_true)) * 100
    
    for i in range(len(y_true)):
        if y_true[i][1] != 0:
            acc_err += np.abs((y_true[i][1] - y_pred[i][1]) / y_true[i][1])
        elif y_true[i][1] == 0 and y_pred[i][1] != 0:
            acc_err += np.abs((y_true[i][1] - y_pred[i][1]) / y_pred[i][1])
        
        print(acc_err)
        
    mean_err = (acc_err / len(y_true)) * 100
    
    return (mean_ex + mean_err)/2
    

In [124]:
y_pred_knr=np.array(y_pred_knr)
y_test_knr=np.array(y_test_knr)
print(y_pred_knr[1][1])
print(y_test_knr[1][1])

1.6644604
0.255608


In [113]:
df01, df02, df03 = readCSVFiles()
df1 = combineAlgorithmsToFile(df01, df02, df03)
df1 = preprocessing(df1)

# # Predict Multiple Output 

In [114]:
X_mo = df1.drop(['executionTime', 'Error'],axis=1)
y_mo = df1[['executionTime', 'Error']]
X_mo = X_mo.values

#split the data randomly
x_train_mo, x_test_mo, y_train_mo, y_test_mo = train_test_split(X_mo, y_mo, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 64) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 64) & (df1['sOriginalDistance'] == 0)]
x_train_mo_nd = a.drop(['executionTime', 'Error'],axis=1)
y_train_mo_nd = a[['executionTime', 'Error']]
x_test_mo_nd = b.drop(['executionTime', 'Error'],axis=1)
y_test_mo_nd = b[['executionTime', 'Error']]

In [106]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = LinearRegression()
y_pred_lr, y_test_lr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_mo_nd, y_test_lr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 55.26502745120746
Mean Squared Error: 11257.08094734558
Root Mean Squared Error: 106.09939183306179
R2 score:  0.6561162033419621
Explained Variance Score:  0.658132120636081
Mean Absolute Percentage Error:  6358419029849319.0
1.0159557360973457
1.4265873859303437
6.835078396961519
10.681970259897268
10.796981024358521
11.976894269445957
11.978620471142756
14.886553169480203
15.007388050162431
15.276290112858579
15.866222101139977
15.940788882932056
16.03701006781681
18.083425655209123
21.372917774471993
23.637760785979946
23.829212549111126
26.207895391651633
26.49031408172838
26.627360131143384
26.85016247005571
30.845666284336932
30.93291256095374
31.69256454361623
33.15415769105128
36.27741330470367
36.506551350174625
36.5459903107308
37.07628784585762
37.15257552990201
38.15257552990201
38.36808550745429
38.56967484915134
42.83489851342


### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 54.03646
Mean Squared Error (avg): 10823.01098
Root Mean Squared Error (avg): 78.24904
R2 score (avg) 0.66943
Explained Variance (avg): 0.67022
Mean Absolute Percentage Error (avg): 7317727055632261.00000


In [115]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = KNeighborsRegressor()
y_pred_knr, y_test_knr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_mo_nd, y_test_knr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 5.495965413675211
Mean Squared Error: 293.5417429705773
Root Mean Squared Error: 17.13305994183693
R2 score:  0.9607228901575908
Explained Variance Score:  0.9607463026820104
Mean Absolute Percentage Error:  441734254663682.5
0.0005148005148004855
0.06986153418314225
0.08033772465933267
0.11294642031150659
0.1527950962257563
0.16503999418494
0.2169298084065928
0.3476705491473336
0.38351054914733357
0.3900618470459738
0.5595409372880868
0.5859850964665207
0.592809532349845
0.6417658890860879
0.6655903404967463
0.6944364943429001
0.7172112770472681
0.7312687850344884
0.7876670476472516
0.7937917692508151
0.8096732751138062
0.8143399417804728
0.8716928829569432
0.8957991138047472
0.9060141675681881
1.1130180586187717
1.119926642010258
1.3087076560339905
1.3507395285439507
1.356072861877284
1.356072861877284
1.363211246348167
1.4599821847941

242.99435408189308
243.67113302547364
244.3479119690542
244.92981991361535
245.51700989170146
246.14344264590193
246.7699028698035
247.39636309370508
248.02282331760665
248.64928354150823
249.23119148606938
249.8169644029561
250.40515345175152
250.9933539732675
251.58155449478346
252.16975501629943
252.7579555378154
253.33986348237656
253.92901676659858
254.54304001463586
255.1570671135912
255.77109421254656
256.3851213115019
256.9991484104572
257.5810563550184
258.1786967444991
258.85831288715747
259.53793294957995
260.21755301200244
260.8971730744249
261.5767931368474
262.1587010814086
262.7593191227967
263.39574281739505
264.0321665119934
264.6685902065918
265.30501390119014
265.9414375957885
266.54685065960797
267.14618459411724
267.75617399299483
268.36608732367546
268.9765810795181
269.58646444274177
270.20303463541836
270.82869205744544
271.44107402894065
272.0576814339659
272.6652187770579
273.26579598112977
273.8733138314593
274.41395374462536
275.0261380000647
275.75475465307


### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 4.54272
Mean Squared Error (avg): 154.89276
Root Mean Squared Error (avg): 10.40777
R2 score (avg) 0.95929
Explained Variance (avg): 0.95957
Mean Absolute Percentage Error (avg): 305509562160791.43750


In [99]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr, y_test_rfr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_mo_nd, y_test_rfr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.059706112018664
Mean Squared Error: 99.25256215828031
Root Mean Squared Error: 9.962558012793718
R2 score:  0.9916077616361527
Explained Variance Score:  0.991614983171915
Mean Absolute Percentage Error:  138901606415210.19
MAPE:  6860.739800252489


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 76.23818751163628
Mean Squared Error: 16543.737047945262
Root Mean Squared Error: 128.62245934495758
R2 score:  0.842294654618264
Explained Variance Score:  0.9014626652085209
Mean Absolute Percentage Error:  22409443994885.57
MAPE:  237.79601058163684

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 1.72511
Mean Squared Error (avg): 42.23372
Root Mean Squared Error (avg): 5.32011
R2 score (avg) 0.98083
Exp

In [None]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr, y_test_dtr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_mo_nd, y_test_dtr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

In [40]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr, y_test_mlpr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_mo_nd, y_test_mlpr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)


### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 28.43475985782556
Mean Squared Error: 3051.9795404008723
Root Mean Squared Error: 55.244724095617244
R2 score:  0.4360526853839238
Explained Variance Score:  0.4451236735671463
Mean Absolute Percentage Error:  645035551717230.0

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 18.38118
Mean Squared Error (avg): 1522.39076
Root Mean Squared Error (avg): 29.26635
R2 score (avg) 0.64700
Explained Variance (avg): 0.65472
Mean Absolute Percentage Error (avg): 935527078423552.00000


In [41]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor, y_test_mor = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_mo_nd, y_test_mor_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)


### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 55.25032222792507
Mean Squared Error: 11256.065954370963
Root Mean Squared Error: 106.09460850755312
R2 score:  0.6562054256387007
Explained Variance Score:  0.6582457223017915
Mean Absolute Percentage Error:  6360384432944519.0

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 54.02367
Mean Squared Error (avg): 10821.96382
Root Mean Squared Error (avg): 78.24632
R2 score (avg) 0.66936
Explained Variance (avg): 0.67014
Mean Absolute Percentage Error (avg): 7314987379020250.00000


# Predict Single Output - Execution

In [44]:
X_sox = df1.drop(['executionTime', 'Error'],axis=1)
y_sox = df1[['executionTime']]
X_sox = X_sox.values

#split the data randomly
x_train_sox, x_test_sox, y_train_sox, y_test_sox = train_test_split(X_sox, y_sox, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 64) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 64) & (df1['sOriginalDistance'] == 0)]
x_train_sox_nd = a.drop(['executionTime', 'Error'],axis=1)
y_train_sox_nd = a[['executionTime']]
x_test_sox_nd = b.drop(['executionTime', 'Error'],axis=1)
y_test_sox_nd = b[['executionTime']]

In [34]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = LinearRegression()
y_pred_lr_sox, y_test_lr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_sox_nd, y_test_lr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 102.64693895214553
Mean Squared Error: 22416.94624813061
Root Mean Squared Error: 149.72289820909361
R2 score:  0.8246858471015389
Explained Variance Score:  0.8255693247225171
Mean Absolute Percentage Error:  1.2488452466446236e+16


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 163.32676078715366
Mean Squared Error: 67629.25854333169
Root Mean Squared Error: 260.05626034251065
R2 score:  0.28792767288799553
Explained Variance Score:  0.38863168076312643
Mean Absolute Percentage Error:  3787078261014705.5

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 100.25122
Mean Squared Error (avg): 21547.61410
Root Mean Squared Error (avg): 146.58253
R2 score (avg) 0.81275
Explained Variance (avg): 0.81314
M

In [37]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = KNeighborsRegressor()
y_pred_knr_sox, y_test_knr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_sox_nd, y_test_knr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 9.1760989010989
Mean Squared Error: 573.0314155982904
Root Mean Squared Error: 23.938074600900766
R2 score:  0.9955185458314512
Explained Variance Score:  0.9955250435580816
Mean Absolute Percentage Error:  29694063477168.156


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 290.75851548269577
Mean Squared Error: 110429.69684995446
Root Mean Squared Error: 332.30964001959745
R2 score:  0.37180653188477486
Explained Variance Score:  0.372146236487363
Mean Absolute Percentage Error:  6169480309207047.0

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 7.50802
Mean Squared Error (avg): 293.42643
Root Mean Squared Error (avg): 16.84559
R2 score (avg) 0.99746
Explained Variance (avg): 0.99748
Mean Absol

In [46]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr_sox, y_test_rfr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_sox_nd, y_test_rfr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 3.723261341502413
Mean Squared Error: 194.5230448779271
Root Mean Squared Error: 13.94715185541217
R2 score:  0.9984787114866349
Explained Variance Score:  0.998479483766117
Mean Absolute Percentage Error:  0.017516648559520058


--- small graphs for training, large graphs for testing ---


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 148.08233333333334
Mean Squared Error: 32327.52322334343
Root Mean Squared Error: 179.7985629067803
R2 score:  0.8161007454648648
Explained Variance Score:  0.9050346113797874
Mean Absolute Percentage Error:  7198376453584.186

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 3.04522
Mean Squared Error (avg): 77.19412
Root Mean Squared Error (avg): 8.09628
R2 score (avg) 0.99933
Explained Variance (avg): 0.99933
Mean Absolute Percentage Error (avg): 13230981635062.72070


In [45]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr_sox, y_test_dtr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_sox_nd, y_test_dtr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 4.171550671550672
Mean Squared Error: 208.29429945054946
Root Mean Squared Error: 14.432404493034051
R2 score:  0.9983710119006599
Explained Variance Score:  0.9983710419299328
Mean Absolute Percentage Error:  0.019424449728164497


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 208.75
Mean Squared Error: 64142.63871812386
Root Mean Squared Error: 253.26397043030786
R2 score:  0.6351163878938353
Explained Variance Score:  0.6454669146135554
Mean Absolute Percentage Error:  0.31289787975476835

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 3.44290
Mean Squared Error (avg): 104.67365
Root Mean Squared Error (avg): 9.57687
R2 score (avg) 0.99909
Explained Variance (avg): 0.99911
Mean Absolute P

In [14]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr_sox, y_test_mlpr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_sox_nd, y_test_mlpr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 18.034004650718295
Mean Squared Error: 832.5176793233629
Root Mean Squared Error: 28.853382458965932
R2 score:  0.9934892054382412
Explained Variance Score:  0.9935243229716583
Mean Absolute Percentage Error:  433730322031937.5

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 26.00114
Mean Squared Error (avg): 1904.37403
Root Mean Squared Error (avg): 40.58693
R2 score (avg) 0.98324
Explained Variance (avg): 0.98334
Mean Absolute Percentage Error (avg): 989594461335514.75000


In [47]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor_sox, y_test_mor_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_sox_nd, y_test_mor_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 102.61303196466051
Mean Squared Error: 22414.947148343414
Root Mean Squared Error: 149.71622206141663
R2 score:  0.8247014812776605
Explained Variance Score:  0.8255868951871324
Mean Absolute Percentage Error:  1.248023910816639e+16


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 318.86523088873474
Mean Squared Error: 135171.7486048175
Root Mean Squared Error: 367.65710737699266
R2 score:  0.23105820291587031
Explained Variance Score:  0.2557686445014138
Mean Absolute Percentage Error:  7234131486124251.0

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 100.22380
Mean Squared Error (avg): 21545.48244
Root Mean Squared Error (avg): 146.57523
R2 score (avg) 0.81277
Explained Variance (avg): 0.813

# Predict Single Output - Error 

In [38]:
X_soe = df1.drop(['executionTime', 'Error'],axis=1)
y_soe = df1[['Error']]
X_soe = X_soe.values

#split the data randomly
x_train_soe, x_test_soe, y_train_soe, y_test_soe = train_test_split(X_soe, y_soe, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 64) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 64) & (df1['sOriginalDistance'] == 0)]
x_train_soe_nd = a.drop(['executionTime', 'Error'],axis=1)
y_train_soe_nd = a[['Error']]
x_test_soe_nd = b.drop(['executionTime', 'Error'],axis=1)
y_test_soe_nd = b[['Error']]

In [39]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = LinearRegression()
y_pred_lr_soe, y_test_lr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_soe_nd, y_test_lr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.883115950269352
Mean Squared Error: 97.21564656053386
Root Mean Squared Error: 9.859799519287087
R2 score:  0.4875465595823846
Explained Variance Score:  0.49069491654964537
Mean Absolute Percentage Error:  228385593252414.6


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.818499753915722
Mean Squared Error: 104.78511372569868
Root Mean Squared Error: 10.236460019249755
R2 score:  0.34469465294312607
Explained Variance Score:  0.5214051984028452
Mean Absolute Percentage Error:  340211599699530.75

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 7.82170
Mean Squared Error (avg): 98.40786
Root Mean Squared Error (avg): 9.91555
R2 score (avg) 0.52611
Explained Variance (avg): 0.52730
Mean Absolute Perce

In [40]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = KNeighborsRegressor()
y_pred_knr_soe, y_test_knr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_soe_nd, y_test_knr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 1.8158319262515263
Mean Squared Error: 14.052070342863946
Root Mean Squared Error: 3.748609121109314
R2 score:  0.9259272344837304
Explained Variance Score:  0.9259675618059393
Mean Absolute Percentage Error:  853774445850197.5


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.322884484153006
Mean Squared Error: 98.37570232815428
Root Mean Squared Error: 9.918452617629137
R2 score:  0.3847778423481879
Explained Variance Score:  0.6307131089491349
Mean Absolute Percentage Error:  2.3915224003067315

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 1.57741
Mean Squared Error (avg): 16.35910
Root Mean Squared Error (avg): 3.96996
R2 score (avg) 0.92113
Explained Variance (avg): 0.92167
Mean Absolute Per

In [48]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr_soe, y_test_rfr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_soe_nd, y_test_rfr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.108057283137977
Mean Squared Error: 0.11294939171798878
Root Mean Squared Error: 0.33607944256974237
R2 score:  0.9994046091711901
Explained Variance Score:  0.999405745428327
Mean Absolute Percentage Error:  15861578907.485468


--- small graphs for training, large graphs for testing ---


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.5457282074499098
Mean Squared Error: 0.8835736382541625
Root Mean Squared Error: 0.9399859776901794
R2 score:  0.9944743054706974
Explained Variance Score:  0.9954306985900699
Mean Absolute Percentage Error:  8449376.678175138

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 0.10502
Mean Squared Error (avg): 0.13309
Root Mean Squared Error (avg): 0.35573
R2 score (avg) 0.99936
Explained Variance (avg): 0.99936
Mean Absolute Percentage Error (avg): 5583431617.81244


In [49]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr_soe, y_test_dtr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_soe_nd, y_test_dtr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.11423272283272412
Mean Squared Error: 0.14625310606079364
Root Mean Squared Error: 0.3824305244888196
R2 score:  0.9992290550953036
Explained Variance Score:  0.9992323633606892
Mean Absolute Percentage Error:  7060588426.8630705


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.5003322877959914
Mean Squared Error: 0.7775640689761109
Root Mean Squared Error: 0.8817959338623143
R2 score:  0.9951372683202578
Explained Variance Score:  0.995834518684125
Mean Absolute Percentage Error:  0.1145209270587277

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 0.12225
Mean Squared Error (avg): 0.19855
Root Mean Squared Error (avg): 0.44125
R2 score (avg) 0.99901
Explained Variance (avg): 0.99904
Mean Abso

In [None]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr_soe, y_test_mlpr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_soe_nd, y_test_mlpr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

In [None]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor_soe, y_test_mor_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_soe_nd, y_test_mor_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)