In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.path as mpath
import numpy as np
from numpy import absolute, mean, std
from sklearn import preprocessing, metrics
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error, explained_variance_score
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold, KFold, validation_curve
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder;
from sklearn.ensemble import RandomForestRegressor

In [50]:
def readCSVFiles():

    df01 = pd.read_csv("bellman_performance_results.csv")
    df02 = pd.read_csv("dijkstra_performance_results.csv")
    df03 = pd.read_csv("hybrid_performance_results.csv")

    return df01, df02, df03

def combineAlgorithmsToFile(df01, df02, df03):
    df01['AlgoBellman'] = 1
    df01['AlgoDijkstra'] = 0
    df01['AlgoHybrid'] = 0
    df02['AlgoBellman'] = 0
    df02['AlgoDijkstra'] = 1
    df02['AlgoHybrid'] = 0
    df03['AlgoBellman'] = 0
    df03['AlgoDijkstra'] = 0
    df03['AlgoHybrid'] = 1
    frames = [df01, df02, df03]
    concat = pd.concat(frames)
    df1 = concat
    
    return df1

def preprocessing(df1):
    conditions = [df1['vertexNum'] == 524288, df1['vertexNum'] == 1048576, df1['vertexNum'] == 2097152, df1['vertexNum'] == 4194304, df1['vertexNum'] == 8388608, df1['vertexNum'] == 16777216]
    numbers = [1, 2, 4, 8, 16, 32]
    df1['graphSize'] = np.select(conditions, numbers, default=0)
    
    df1 = df1.drop(['vertexNum', 'edgeNum', 'maxEdgeDegree',
                    'sAtomicBlock'], axis=1)
    df1.dropna(inplace=True)
    
    df1 = df1[df1['minProcessEdge'] >= 0]
              
    for i in numbers:
        mini = df1[df1['graphSize'] == i]['minProcessEdge'].min()
        maxi = df1[df1['graphSize'] == i]['minProcessEdge'].max()
        df1.loc[df1['graphSize'] == i, ['minProcessEdge']] = 1.0 / ((((df1.loc[df1['graphSize'] == i, ['minProcessEdge']] - mini) / (maxi - mini)) * 9) + 1)

    return df1

def _regressorAlgorithm(model, x_train, x_test, y_train, y_test):
    #fit to model
    model.fit(x_train, y_train)
    #predict the outcome
    y_pred = model.predict(x_test)
    #summarize performance
    print("\n### Performance Summarization of _regressorAlgorithm ###\n")
    print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
    print('R2 score: ', r2_score(y_test, y_pred))
    print('Explained Variance Score: ', explained_variance_score(y_test, y_pred))
    print('Mean Absolute Percentage Error: ', mean_absolute_percentage_error(y_test, y_pred))
    #print('MAPE: ', MAPE(y_test, y_pred))
    
    return y_pred, y_test

def _regressorAlgorithm_KFold(model, X, y):
    #model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
    cv = RepeatedKFold(n_splits=10, n_repeats=5, random_state=1)
    # evaluate the model and collect the scores
    n_scores_mae = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    n_scores_mse = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
    n_scores_rmse = cross_val_score(model, X, y, scoring='neg_root_mean_squared_error', cv=cv, n_jobs=-1)
    n_scores_r2 = cross_val_score(model, X, y, scoring='r2', cv=cv, n_jobs=-1)
    n_scores_ev = cross_val_score(model, X, y, scoring='explained_variance', cv=cv, n_jobs=-1)
    n_scores_mape = cross_val_score(model, X, y, scoring='neg_mean_absolute_percentage_error', cv=cv, n_jobs=-1)
    # force the scores to be positive
    n_scores_mae = absolute(n_scores_mae)
    n_scores_mse = absolute(n_scores_mse)
    n_scores_rmse = absolute(n_scores_rmse)
    n_scores_r2 = absolute(n_scores_r2)
    n_scores_ev = absolute(n_scores_ev)
    n_scores_mape = absolute(n_scores_mape)
    # summarize performance
    print("\n### Performance Summarization of _regressorAlgorithm_KFold ###\n")
    print('Mean Absolute Error (avg): %.5f' % mean(n_scores_mae))
    print('Mean Squared Error (avg): %.5f' % mean(n_scores_mse))
    print('Root Mean Squared Error (avg): %.5f' % mean(n_scores_rmse))
    print('R2 score (avg) %.5f' % mean(n_scores_r2))
    print('Explained Variance (avg): %.5f' % mean(n_scores_ev))
    print('Mean Absolute Percentage Error (avg): %.5f' % mean(n_scores_mape))
    
def draw_BFDJ(arr1_b, arr2_b, arr1_d, arr2_d, x_label, y_label):
    
    asterisk = mpath.Path.unit_regular_asterisk(6)
    circle = mpath.Path.unit_circle()

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(arr1_b, arr2_b, color='red', label='predicted_data', marker=asterisk, markersize=7)
    ax.plot(arr1_d, arr2_d, color='blue', label='test_data', marker=circle, markersize=4)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    plt.legend()
    plt.show()
    
def lineGraphOfPredictionError(y_test, y_pred):
    y_pred_exec, y_pred_err, y_test_exec, y_test_err = [], [], [], []
    y_test_n = y_test.to_numpy()
    y_pred_n = y_pred
    y_test_n.sort()
    y_pred_n.sort()
    for i in range(20):
        y_pred_exec.append(y_pred_n[i][1])
        y_pred_err.append(y_pred_n[i][0])
        y_test_exec.append(y_test_n[i][1])
        y_test_err.append(y_test_n[i][0])

    y_pred_exec.sort()
    y_test_exec.sort()
    y_pred_err.sort()
    y_test_err.sort()
    print(y_pred_exec)
    print(y_test_exec)


    draw_BFDJ(y_pred_exec, y_pred_err, y_test_exec, y_test_err, "execution time", "error")
    
def MAPE(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    print(y_true.shape)
    print(y_pred.shape)
    acc_ex, acc_err = 0, 0
    
    for i in range(len(y_true)):
        if y_true[i][0] != 0:
            acc_ex += np.abs((y_true[i][0] - y_pred[i][0]) / y_true[i][0])
        elif y_true[i][0] == 0 and y_pred[i][0] != 0:
            acc_ex += np.abs((y_true[i][0] - y_pred[i][0]) / y_pred[i][0])
        #print(acc_ex)
        
    mean_ex = (acc_ex / len(y_true)) * 100
    
    for i in range(len(y_true)):
        if y_true[i][1] != 0:
            acc_err += np.abs((y_true[i][1] - y_pred[i][1]) / y_true[i][1])
        elif y_true[i][1] == 0 and y_pred[i][1] != 0:
            acc_err += np.abs((y_true[i][1] - y_pred[i][1]) / y_pred[i][1])
        
        #print(acc_err)
        
    mean_err = (acc_err / len(y_true)) * 100
    
    return (mean_ex + mean_err)/2
    

In [3]:
y_pred_knr=np.array(y_pred_knr)
y_test_knr=np.array(y_test_knr)
print(y_pred_knr[1][1])
print(y_test_knr[1][1])

NameError: name 'y_pred_knr' is not defined

In [27]:
df01, df02, df03 = readCSVFiles()
df1 = combineAlgorithmsToFile(df01, df02, df03)
df1 = preprocessing(df1)

# # Predict Multiple Output 

In [45]:
X_mo = df1.drop(['executionTime', 'Error'],axis=1)
y_mo = df1[['executionTime', 'Error']]
X_mo = X_mo.values

#split the data randomly
x_train_mo, x_test_mo, y_train_mo, y_test_mo = train_test_split(X_mo, y_mo, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 32) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 32) & (df1['sOriginalDistance'] == 0)]
print(a.shape)
print(b.shape)
x_train_mo_nd = a.drop(['executionTime', 'Error'],axis=1)
y_train_mo_nd = a[['executionTime', 'Error']]
x_test_mo_nd = b.drop(['executionTime', 'Error'],axis=1)
y_test_mo_nd = b[['executionTime', 'Error']]

(33063, 15)
(6656, 15)


In [46]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = LinearRegression()
y_pred_lr, y_test_lr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_mo_nd, y_test_lr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 43.29687684884682
Mean Squared Error: 6301.143994141938
Root Mean Squared Error: 79.3797454905339
R2 score:  0.7064698246797525
Explained Variance Score:  0.7064742075043859
Mean Absolute Percentage Error:  23723170726376.867
(7944, 2)
(7944, 2)
MAPE:  1463.1520949799428


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 115.95674152323174
Mean Squared Error: 32511.966497378548
Root Mean Squared Error: 180.31074981092655
R2 score:  0.423239484451587
Explained Variance Score:  0.4353088954452023
Mean Absolute Percentage Error:  38260920910538.08
(6656, 2)
(6656, 2)
MAPE:  1232.5678667519458

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 43.53596
Mean Squared Error (avg): 6317.33655
Root Mean Squared Error (av

In [30]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = KNeighborsRegressor()
y_pred_knr, y_test_knr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_mo_nd, y_test_knr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.83428822255789
Mean Squared Error: 290.63660277430233
Root Mean Squared Error: 17.048067420511405
R2 score:  0.9594806417378138
Explained Variance Score:  0.9598789186813522
Mean Absolute Percentage Error:  12972317126066.326
MAPE:  497.7560290536771


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 190.52729872896595
Mean Squared Error: 100535.07399625547
Root Mean Squared Error: 317.07266359031246
R2 score:  -0.1437576385808389
Explained Variance Score:  0.4300371763213948
Mean Absolute Percentage Error:  6766226.840438727
MAPE:  143.88152482671384

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 6.48116
Mean Squared Error (avg): 259.02290
Root Mean Squared Error (avg): 13.28516
R2 score (avg) 0.96240

In [31]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr, y_test_rfr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_mo_nd, y_test_rfr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.151607625530262
Mean Squared Error: 91.56865701730122
Root Mean Squared Error: 9.569151321684762
R2 score:  0.9893711508204175
Explained Variance Score:  0.9893722674179711
Mean Absolute Percentage Error:  359940732680.7515
MAPE:  15.42766968312555


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 118.7354489195394
Mean Squared Error: 39791.214022327855
Root Mean Squared Error: 199.47735215389204
R2 score:  0.5998241939048825
Explained Variance Score:  0.8582903067671774
Mean Absolute Percentage Error:  17812088370.266453
MAPE:  29.63647699895561


In [32]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr, y_test_dtr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_mo_nd, y_test_dtr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.446862204284149
Mean Squared Error: 105.23938291930561
Root Mean Squared Error: 10.258624806439975
R2 score:  0.9879428466312078
Explained Variance Score:  0.9879439862723103
Mean Absolute Percentage Error:  267103595724.62274
MAPE:  10.610289542316531


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 120.02355227313701
Mean Squared Error: 40202.90326230355
Root Mean Squared Error: 200.5066165050509
R2 score:  0.596144238345764
Explained Variance Score:  0.8425798039018316
Mean Absolute Percentage Error:  0.2870250913238438
MAPE:  28.702509132384378


In [33]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr, y_test_mlpr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_mo_nd, y_test_mlpr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 40.82214929848098
Mean Squared Error: 6466.58790075504
Root Mean Squared Error: 80.41509746779543
R2 score:  0.3984870705583446
Explained Variance Score:  0.3997239120999623
Mean Absolute Percentage Error:  116123636765120.98
MAPE:  3039.8898329224958


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 156.1765688031863
Mean Squared Error: 56411.092434137434
Root Mean Squared Error: 237.51019437939382
R2 score:  -10.601731768164603
Explained Variance Score:  -9.823248105512082
Mean Absolute Percentage Error:  543466257938026.4
MAPE:  27656.675302465566


In [34]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor, y_test_mor = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_mo_nd, y_test_mor_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 43.29540729228551
Mean Squared Error: 6301.150094767646
Root Mean Squared Error: 79.37978391736554
R2 score:  0.706467841184427
Explained Variance Score:  0.7064722252806279
Mean Absolute Percentage Error:  23912078106188.992
MAPE:  1462.880689162574


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 115.95917697169266
Mean Squared Error: 32513.53168682082
Root Mean Squared Error: 180.31509001417717
R2 score:  0.4232335087985196
Explained Variance Score:  0.43530413506156934
Mean Absolute Percentage Error:  38268066563642.97
MAPE:  1232.9424414767243


# Predict Single Output - Execution

In [51]:
X_sox = df1.drop(['executionTime', 'Error'],axis=1)
y_sox = df1[['executionTime']]
X_sox = X_sox.values

#split the data randomly
x_train_sox, x_test_sox, y_train_sox, y_test_sox = train_test_split(X_sox, y_sox, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 32) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 32) & (df1['sOriginalDistance'] == 0)]
x_train_sox_nd = a.drop(['executionTime', 'Error'],axis=1)
y_train_sox_nd = a[['executionTime']]
x_test_sox_nd = b.drop(['executionTime', 'Error'],axis=1)
y_test_sox_nd = b[['executionTime']]

In [52]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = LinearRegression()
y_pred_lr_sox, y_test_lr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_sox_nd, y_test_lr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 78.60858888499118
Mean Squared Error: 12503.88243804214
Root Mean Squared Error: 111.82076031776094
R2 score:  0.7799347979488677
Explained Variance Score:  0.7799349337934924
Mean Absolute Percentage Error:  5.357850929487972


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 223.21519697997226
Mean Squared Error: 64901.635386231355
Root Mean Squared Error: 254.75799376316212
R2 score:  0.39149614114842946
Explained Variance Score:  0.3956989418088883
Mean Absolute Percentage Error:  2.9607921754212905

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 79.07419
Mean Squared Error (avg): 12536.17455
Root Mean Squared Error (avg): 111.94887
R2 score (avg) 0.78120
Explained Variance (avg): 0.78124
Mean Abs

In [53]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = KNeighborsRegressor()
y_pred_knr_sox, y_test_knr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_sox_nd, y_test_knr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 10.738521291540785
Mean Squared Error: 562.1967035727897
Root Mean Squared Error: 23.710687539014756
R2 score:  0.9901054786961354
Explained Variance Score:  0.9901304388877623
Mean Absolute Percentage Error:  0.07660717623072895


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 374.14752323918276
Mean Squared Error: 200979.67916300517
Root Mean Squared Error: 448.3075720562895
R2 score:  -0.8843425068358768
Explained Variance Score:  0.24271959732304982
Mean Absolute Percentage Error:  0.7838873345703958

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 10.16477
Mean Squared Error (avg): 500.22090
Root Mean Squared Error (avg): 22.34888
R2 score (avg) 0.99127
Explained Variance (avg): 0.99130
Mean

In [68]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr_sox, y_test_rfr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_sox_nd, y_test_rfr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 3.5268705109616185
Mean Squared Error: 177.71746484631566
Root Mean Squared Error: 13.331071406541774
R2 score:  0.9968722170891153
Explained Variance Score:  0.9968722573728402
Mean Absolute Percentage Error:  0.01856863445833698


--- small graphs for training, large graphs for testing ---


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 235.91860049343282
Mean Squared Error: 79835.4781570572
Root Mean Squared Error: 282.551726515796
R2 score:  0.2514796238534277
Explained Variance Score:  0.7732316264033323
Mean Absolute Percentage Error:  0.42916702544913937


In [55]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr_sox, y_test_dtr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_sox_nd, y_test_dtr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 4.104553128776435
Mean Squared Error: 206.84853356935565
Root Mean Squared Error: 14.382229784333013
R2 score:  0.9963595175690849
Explained Variance Score:  0.9963595706512506
Mean Absolute Percentage Error:  0.021428623028435335


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 236.30936769831732
Mean Squared Error: 80464.5768515542
Root Mean Squared Error: 283.662787216713
R2 score:  0.24558132898116747
Explained Variance Score:  0.7399975832998598
Mean Absolute Percentage Error:  0.42985210380860983


In [56]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr_sox, y_test_mlpr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_sox_nd, y_test_mlpr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 78.30462859646943
Mean Squared Error: 16175.493330580814
Root Mean Squared Error: 127.18291288762344
R2 score:  0.715315364990877
Explained Variance Score:  0.7153710466372847
Mean Absolute Percentage Error:  1.7685165713630142


--- small graphs for training, large graphs for testing ---


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 261.1389610698077
Mean Squared Error: 100231.05736909473
Root Mean Squared Error: 316.59288900588837
R2 score:  0.060255033283701964
Explained Variance Score:  0.0868377311228864
Mean Absolute Percentage Error:  2.7512066211982824


In [57]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor_sox, y_test_mor_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_sox_nd, y_test_mor_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 78.6055665709657
Mean Squared Error: 12503.89362839962
Root Mean Squared Error: 111.82081035477977
R2 score:  0.7799346010013762
Explained Variance Score:  0.7799347363881266
Mean Absolute Percentage Error:  5.357511317938575


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 223.22046088093273
Mean Squared Error: 64904.76967742489
Root Mean Squared Error: 254.76414519595352
R2 score:  0.39146675470424075
Explained Variance Score:  0.39567107582443595
Mean Absolute Percentage Error:  2.960868535462173


# Predict Single Output - Error 

In [60]:
X_soe = df1.drop(['executionTime', 'Error'],axis=1)
y_soe = df1[['Error']]
X_soe = X_soe.values

#split the data randomly
x_train_soe, x_test_soe, y_train_soe, y_test_soe = train_test_split(X_soe, y_soe, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 32) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 32) & (df1['sOriginalDistance'] == 0)]
x_train_soe_nd = a.drop(['executionTime', 'Error'],axis=1)
y_train_soe_nd = a[['Error']]
x_test_soe_nd = b.drop(['executionTime', 'Error'],axis=1)
y_test_soe_nd = b[['Error']]

In [61]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = LinearRegression()
y_pred_lr_soe, y_test_lr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_soe_nd, y_test_lr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.985164812702663
Mean Squared Error: 98.40555024173175
Root Mean Squared Error: 9.919957169349662
R2 score:  0.6330048514106378
Explained Variance Score:  0.6330134812152786
Mean Absolute Percentage Error:  47446341452748.82


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.69828606649093
Mean Squared Error: 122.29760852593235
Root Mean Squared Error: 11.058824916144227
R2 score:  0.4549828277547452
Explained Variance Score:  0.4749188490815184
Mean Absolute Percentage Error:  76521841821072.6

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 7.99773
Mean Squared Error (avg): 98.49856
Root Mean Squared Error (avg): 9.92416
R2 score (avg) 0.63273
Explained Variance (avg): 0.63285
Mean Absolute Percentage

In [62]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = KNeighborsRegressor()
y_pred_knr_soe, y_test_knr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_soe_nd, y_test_knr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.930055153575025
Mean Squared Error: 19.07650197581435
Root Mean Squared Error: 4.367665506402059
R2 score:  0.9288558047794923
Explained Variance Score:  0.9296273984749437
Mean Absolute Percentage Error:  25944634252132.75


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.907074218750001
Mean Squared Error: 90.46882950585758
Root Mean Squared Error: 9.511510369329237
R2 score:  0.5968272296741979
Explained Variance Score:  0.6173547553197432
Mean Absolute Percentage Error:  13532452.896989848

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 2.79755
Mean Squared Error (avg): 17.82490
Root Mean Squared Error (avg): 4.22145
R2 score (avg) 0.93353
Explained Variance (avg): 0.93424
Mean Absolute Perce

In [63]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr_soe, y_test_rfr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_soe_nd, y_test_rfr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.4704840696177987
Mean Squared Error: 4.3768135930962355
Root Mean Squared Error: 2.092083553086787
R2 score:  0.9836770451361685
Explained Variance Score:  0.9836785476828556
Mean Absolute Percentage Error:  130958146.2958586


--- small graphs for training, large graphs for testing ---


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.934782948044088
Mean Squared Error: 16.56016795471688
Root Mean Squared Error: 4.069418626132839
R2 score:  0.9261998985967704
Explained Variance Score:  0.926288912416843
Mean Absolute Percentage Error:  77811592.2450326


In [64]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr_soe, y_test_dtr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_soe_nd, y_test_dtr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.4685386528617204
Mean Squared Error: 4.608390627973234
Root Mean Squared Error: 2.146716243003074
R2 score:  0.9828133982370272
Explained Variance Score:  0.9828146859659961
Mean Absolute Percentage Error:  170075514.65978175


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 3.1465757151442277
Mean Squared Error: 20.36086378253124
Root Mean Squared Error: 4.512301384275128
R2 score:  0.9092621635289544
Explained Variance Score:  0.9095101832197485
Mean Absolute Percentage Error:  0.13313840398391735


In [65]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr_soe, y_test_mlpr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_soe_nd, y_test_mlpr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 9.196089568783094
Mean Squared Error: 131.89666840468084
Root Mean Squared Error: 11.484627482190305
R2 score:  0.5081025683946641
Explained Variance Score:  0.5089045010344985
Mean Absolute Percentage Error:  110799871774549.73


--- small graphs for training, large graphs for testing ---


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 9.086768251446811
Mean Squared Error: 130.52945223391762
Root Mean Squared Error: 11.42494867532969
R2 score:  0.4182977589772988
Explained Variance Score:  0.4332947550235846
Mean Absolute Percentage Error:  145495445165744.56


In [66]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor_soe, y_test_mor_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_soe_nd, y_test_mor_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.985248013605315
Mean Squared Error: 98.40656113567145
Root Mean Squared Error: 9.920008121754309
R2 score:  0.6330010813674776
Explained Variance Score:  0.6330097141731292
Mean Absolute Percentage Error:  47824156212372.625


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.697893062452586
Mean Squared Error: 122.29369621674866
Root Mean Squared Error: 11.058648028432257
R2 score:  0.4550002628927985
Explained Variance Score:  0.47493719429870274
Mean Absolute Percentage Error:  76536133127282.98
