In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.path as mpath
import numpy as np
from numpy import absolute, mean, std
from sklearn import preprocessing, metrics
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error, explained_variance_score
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold, KFold, validation_curve
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder;
from sklearn.ensemble import RandomForestRegressor

In [234]:
def readCSVFiles():

    df01 = pd.read_csv("bellman_performance_results.csv")
    df02 = pd.read_csv("dijkstra_performance_results.csv")
    df03 = pd.read_csv("hybrid_performance_results.csv")

    return df01, df02, df03

def combineAlgorithmsToFile(df01, df02, df03):
    df01['AlgoBellman'] = 1
    df01['AlgoDijkstra'] = 0
    df01['AlgoHybrid'] = 0
    df02['AlgoBellman'] = 0
    df02['AlgoDijkstra'] = 1
    df02['AlgoHybrid'] = 0
    df03['AlgoBellman'] = 0
    df03['AlgoDijkstra'] = 0
    df03['AlgoHybrid'] = 1
    #frames = [df01, df02, df03]
    #concat = pd.append(frames)
    df04 = df01.append(df02,ignore_index=True)
    df05 = df04.append(df03,ignore_index=True)
    df1 = df05
    
    return df1

beta = 0.0000000001
def executionTimeToSpeedup(df):
    
    algorithms = ['AlgoBellman', 'AlgoDijkstra', 'AlgoHybrid']
    vertexNums = [524288, 1048576, 2097152, 4194304, 8388608, 16777216]
    df['speedup'] = 1.0

    for al in algorithms:
        for v in vertexNums:
            base = df.loc[(df[al] == 1) & (df['vertexNum'] == v) & (df['sOriginalDistance'] == 1), 'executionTime']
            df.loc[(df[al] == 1) & (df['vertexNum'] == v), 'speedup'] = base.values/((df.loc[(df[al] == 1) & (df['vertexNum'] == v), 'executionTime']).astype(float))

    #print(df['speedup'])
    df['Error'] = df['Error'] + beta
    return df
    

def preprocessing(df1):
    conditions = [df1['vertexNum'] == 524288, df1['vertexNum'] == 1048576, df1['vertexNum'] == 2097152, df1['vertexNum'] == 4194304, df1['vertexNum'] == 8388608, df1['vertexNum'] == 16777216]
    numbers = [1, 2, 4, 8, 16, 32]
    df1['graphSize'] = np.select(conditions, numbers, default=0)
    
    df1 = df1.drop(['vertexNum', 'edgeNum', 'maxEdgeDegree',
                    'sAtomicBlock', 'executionTime'], axis=1)
    df1.dropna(inplace=True)
    
    df1 = df1[df1['minProcessEdge'] >= 0]
              
    for i in numbers:
        mini = df1[df1['graphSize'] == i]['minProcessEdge'].min()
        maxi = df1[df1['graphSize'] == i]['minProcessEdge'].max()
        df1.loc[df1['graphSize'] == i, ['minProcessEdge']] = 1.0 / ((((df1.loc[df1['graphSize'] == i, ['minProcessEdge']] - mini) / (maxi - mini)) * 9) + 1)
    
    return df1



def _regressorAlgorithm(model, x_train, x_test, y_train, y_test):
    #fit to model
    model.fit(x_train, y_train)
    #predict the outcome
    y_pred = model.predict(x_test)
    #summarize performance
    print("\n### Performance Summarization of _regressorAlgorithm ###\n")
    print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
    print('R2 score: ', r2_score(y_test, y_pred))
    print('Explained Variance Score: ', explained_variance_score(y_test, y_pred))
    print('Mean Absolute Percentage Error: ', mean_absolute_percentage_error(y_test, y_pred))
    #print('MAPE: ', MAPE(y_test, y_pred))
    
    return y_pred, y_test

def _regressorAlgorithm_KFold(model, X, y):
    #model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
    cv = RepeatedKFold(n_splits=10, n_repeats=5, random_state=1)
    # evaluate the model and collect the scores
    n_scores_mae = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    n_scores_mse = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
    n_scores_rmse = cross_val_score(model, X, y, scoring='neg_root_mean_squared_error', cv=cv, n_jobs=-1)
    n_scores_r2 = cross_val_score(model, X, y, scoring='r2', cv=cv, n_jobs=-1)
    n_scores_ev = cross_val_score(model, X, y, scoring='explained_variance', cv=cv, n_jobs=-1)
    n_scores_mape = cross_val_score(model, X, y, scoring='neg_mean_absolute_percentage_error', cv=cv, n_jobs=-1)
    # force the scores to be positive
    n_scores_mae = absolute(n_scores_mae)
    n_scores_mse = absolute(n_scores_mse)
    n_scores_rmse = absolute(n_scores_rmse)
    n_scores_r2 = absolute(n_scores_r2)
    n_scores_ev = absolute(n_scores_ev)
    n_scores_mape = absolute(n_scores_mape)
    # summarize performance
    print("\n### Performance Summarization of _regressorAlgorithm_KFold ###\n")
    print('Mean Absolute Error (avg): %.5f' % mean(n_scores_mae))
    print('Mean Squared Error (avg): %.5f' % mean(n_scores_mse))
    print('Root Mean Squared Error (avg): %.5f' % mean(n_scores_rmse))
    print('R2 score (avg) %.5f' % mean(n_scores_r2))
    print('Explained Variance (avg): %.5f' % mean(n_scores_ev))
    print('Mean Absolute Percentage Error (avg): %.5f' % mean(n_scores_mape))
    
def draw_BFDJ(arr1_b, arr2_b, arr1_d, arr2_d, x_label, y_label):
    
    asterisk = mpath.Path.unit_regular_asterisk(6)
    circle = mpath.Path.unit_circle()

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(arr1_b, arr2_b, color='red', label='predicted_data', marker=asterisk, markersize=7)
    ax.plot(arr1_d, arr2_d, color='blue', label='test_data', marker=circle, markersize=4)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    plt.legend()
    plt.show()
    
def lineGraphOfPredictionError(y_test, y_pred):
    y_pred_exec, y_pred_err, y_test_exec, y_test_err = [], [], [], []
    y_test_n = y_test.to_numpy()
    y_pred_n = y_pred
    y_test_n.sort()
    y_pred_n.sort()
    for i in range(20):
        y_pred_exec.append(y_pred_n[i][1])
        y_pred_err.append(y_pred_n[i][0])
        y_test_exec.append(y_test_n[i][1])
        y_test_err.append(y_test_n[i][0])

    y_pred_exec.sort()
    y_test_exec.sort()
    y_pred_err.sort()
    y_test_err.sort()
    print(y_pred_exec)
    print(y_test_exec)


    draw_BFDJ(y_pred_exec, y_pred_err, y_test_exec, y_test_err, "execution time", "error")
    
def MAPE(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    print(y_true.shape)
    print(y_pred.shape)
    acc_ex, acc_err = 0, 0
    
    for i in range(len(y_true)):
        if y_true[i][0] != 0:
            acc_ex += np.abs((y_true[i][0] - y_pred[i][0]) / y_true[i][0])
        elif y_true[i][0] == 0 and y_pred[i][0] != 0:
            acc_ex += np.abs((y_true[i][0] - y_pred[i][0]) / y_pred[i][0])
        #print(acc_ex)
        
    mean_ex = (acc_ex / len(y_true)) * 100
    
    for i in range(len(y_true)):
        if y_true[i][1] != 0:
            acc_err += np.abs((y_true[i][1] - y_pred[i][1]) / y_true[i][1])
        elif y_true[i][1] == 0 and y_pred[i][1] != 0:
            acc_err += np.abs((y_true[i][1] - y_pred[i][1]) / y_pred[i][1])
        
        #print(acc_err)
        
    mean_err = (acc_err / len(y_true)) * 100
    
    return (mean_ex + mean_err)/2
    

In [287]:
'''
y_pred_knr=np.array(y_pred_knr)
y_test_knr=np.array(y_test_knr)
print(y_pred_knr[1][1])
print(y_test_knr[1][1])
'''

In [288]:
df01, df02, df03 = readCSVFiles()
df1 = combineAlgorithmsToFile(df01, df02, df03)
df2 = executionTimeToSpeedup(df1)
df1 = preprocessing(df2)

# # Predict Multiple Output 

In [289]:
X_mo = df1.drop(['speedup', 'Error'],axis=1)
y_mo = df1[['speedup', 'Error']]
X_mo = X_mo.values

#split the data randomly
x_train_mo, x_test_mo, y_train_mo, y_test_mo = train_test_split(X_mo, y_mo, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 32) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 32) & (df1['sOriginalDistance'] == 0)]
print(a.shape)
print(b.shape)
x_train_mo_nd = a.drop(['speedup', 'Error'],axis=1)
y_train_mo_nd = a[['speedup', 'Error']]
x_test_mo_nd = b.drop(['speedup', 'Error'],axis=1)
y_test_mo_nd = b[['speedup', 'Error']]

(33063, 15)
(6656, 15)


In [290]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = LinearRegression()
y_pred_lr, y_test_lr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_mo_nd, y_test_lr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.84313021558161
Mean Squared Error: 125.78670920579579
Root Mean Squared Error: 11.21546740915401
R2 score:  0.3961814151426306
Explained Variance Score:  0.39625789924652416
Mean Absolute Percentage Error:  52676034.02701403


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.442025800934283
Mean Squared Error: 169.00443927912374
Root Mean Squared Error: 13.000170740383519
R2 score:  0.30427734336554935
Explained Variance Score:  0.31431095292394073
Mean Absolute Percentage Error:  84956322.86213845

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 6.89141
Mean Squared Error (avg): 132.03515
Root Mean Squared Error (avg): 11.38822
R2 score (avg) 0.39867
Explained Variance (avg): 0.39885
Mean Absolute Percent

In [291]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = KNeighborsRegressor()
y_pred_knr, y_test_knr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_mo_nd, y_test_knr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
_regressorAlgorithm_KFold(model, X_mo, y_mo)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 1.5889085382517807
Mean Squared Error: 9.918802173435845
Root Mean Squared Error: 3.1494129887069184
R2 score:  0.9623392964597497
Explained Variance Score:  0.9627299240105552
Mean Absolute Percentage Error:  28804335.285324987


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 4.151517402156787
Mean Squared Error: 76.34778049360804
Root Mean Squared Error: 8.737721699253648
R2 score:  0.6763280681666504
Explained Variance Score:  0.6885413457741213
Mean Absolute Percentage Error:  16.17431300094221

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 1.51092
Mean Squared Error (avg): 9.21246
Root Mean Squared Error (avg): 2.49608
R2 score (avg) 0.96524
Explained Variance (avg): 0.96559
Mean Absolute Percenta

In [292]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr, y_test_rfr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_mo_nd, y_test_rfr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.28050733047532483
Mean Squared Error: 2.3247736368646095
Root Mean Squared Error: 1.5247208389946696
R2 score:  0.9910898791423568
Explained Variance Score:  0.9910903628504993
Mean Absolute Percentage Error:  33771.43075297826


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 1.807043768934095
Mean Squared Error: 11.177457474339203
Root Mean Squared Error: 3.3432704757974943
R2 score:  0.9513952488462594
Explained Variance Score:  0.9516403830795548
Mean Absolute Percentage Error:  1787.984268374141


In [293]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr, y_test_dtr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_mo_nd, y_test_dtr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.2846369287927214
Mean Squared Error: 2.4717381272753656
Root Mean Squared Error: 1.5721762392541638
R2 score:  0.9904801982740589
Explained Variance Score:  0.9904806943248827
Mean Absolute Percentage Error:  22973.34497162483


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 1.7920324066131532
Mean Squared Error: 11.611111417586976
Root Mean Squared Error: 3.407508095014152
R2 score:  0.9495083521901037
Explained Variance Score:  0.9499654287406138
Mean Absolute Percentage Error:  0.12006376634524427


In [294]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr, y_test_mlpr = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_mo_nd, y_test_mlpr_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.92773065785158
Mean Squared Error: 239.70670051533892
Root Mean Squared Error: 15.482464290781973
R2 score:  -0.060287263382106215
Explained Variance Score:  0.009522097202763702
Mean Absolute Percentage Error:  131680460.9283236


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.979851129042608
Mean Squared Error: 228.65880161594657
Root Mean Squared Error: 15.121468236118693
R2 score:  0.04892946067091092
Explained Variance Score:  0.06443222181679681
Mean Absolute Percentage Error:  69675413.13579233


In [295]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor, y_test_mor = _regressorAlgorithm(model, x_train_mo, x_test_mo, y_train_mo, y_test_mo)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_mo_nd, y_test_mor_mo_nd = _regressorAlgorithm(model, x_train_mo_nd, x_test_mo_nd, y_train_mo_nd, y_test_mo_nd)
#_regressorAlgorithm_KFold(model, X_mo, y_mo)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON MULTIPLE OUTPUT

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.8431116010414055
Mean Squared Error: 125.78756123850957
Root Mean Squared Error: 11.215505393806806
R2 score:  0.39617762793005085
Explained Variance Score:  0.39625409982773224
Mean Absolute Percentage Error:  53095492.6696085


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.441842630659741
Mean Squared Error: 169.00279894611677
Root Mean Squared Error: 13.000107651327998
R2 score:  0.3042848216841792
Explained Variance Score:  0.31431948787735076
Mean Absolute Percentage Error:  84972189.40299758


# Predict Single Output - Execution

In [296]:
X_sox = df1.drop(['speedup', 'Error'],axis=1)
y_sox = df1[['speedup']]
X_sox = X_sox.values

#split the data randomly
x_train_sox, x_test_sox, y_train_sox, y_test_sox = train_test_split(X_sox, y_sox, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 32) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 32) & (df1['sOriginalDistance'] == 0)]
x_train_sox_nd = a.drop(['speedup', 'Error'],axis=1)
y_train_sox_nd = a[['speedup']]
x_test_sox_nd = b.drop(['speedup', 'Error'],axis=1)
y_test_sox_nd = b[['speedup']]

In [297]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = LinearRegression()
y_pred_lr_sox, y_test_lr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_sox_nd, y_test_lr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 5.701095618460538
Mean Squared Error: 153.16786816985947
Root Mean Squared Error: 12.376100685185923
R2 score:  0.159357978874625
Explained Variance Score:  0.1595023172777681
Mean Absolute Percentage Error:  2.720660445134634


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.18576553537762
Mean Squared Error: 215.711270032316
Root Mean Squared Error: 14.687112378963947
R2 score:  0.15357185897634995
Explained Variance Score:  0.15370305676636586
Mean Absolute Percentage Error:  2.6823209899229346

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 5.78508
Mean Squared Error (avg): 165.57174
Root Mean Squared Error (avg): 12.85228
R2 score (avg) 0.16461
Explained Variance (avg): 0.16486
Mean Absolute P

In [298]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = KNeighborsRegressor()
y_pred_knr_sox, y_test_knr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_sox_nd, y_test_knr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
_regressorAlgorithm_KFold(model, X_sox, y_sox)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.24776192292852806
Mean Squared Error: 0.7611023710573066
Root Mean Squared Error: 0.872411812768091
R2 score:  0.9958227881400074
Explained Variance Score:  0.9958324495461685
Mean Absolute Percentage Error:  0.0696625710877094


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 1.395960585563599
Mean Squared Error: 62.22673148135841
Root Mean Squared Error: 7.888392198753711
R2 score:  0.7558289066591032
Explained Variance Score:  0.7597279362285021
Mean Absolute Percentage Error:  0.20695617316819567

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 0.22430
Mean Squared Error (avg): 0.60002
Root Mean Squared Error (avg): 0.77070
R2 score (avg) 0.99694
Explained Variance (avg): 0.99694
Mean Absolu

In [299]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr_sox, y_test_rfr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_sox_nd, y_test_rfr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.07685436320996744
Mean Squared Error: 0.2753706468321316
Root Mean Squared Error: 0.5247577029755081
R2 score:  0.9984886638439412
Explained Variance Score:  0.998488842530101
Mean Absolute Percentage Error:  0.0185652919706077


--- small graphs for training, large graphs for testing ---


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.5465914707121303
Mean Squared Error: 4.481350952828815
Root Mean Squared Error: 2.1169201574052847
R2 score:  0.9824156542413885
Explained Variance Score:  0.9828424949794616
Mean Absolute Percentage Error:  0.09908414955026237


In [300]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr_sox, y_test_dtr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_sox_nd, y_test_dtr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
print(y_pred_dtr_sox_nd[:20])
print(y_test_dtr_sox_nd.head(20))
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.08711011186782636
Mean Squared Error: 0.3357590851331721
Root Mean Squared Error: 0.5794472237686294
R2 score:  0.9981572297159313
Explained Variance Score:  0.9981578476066854
Mean Absolute Percentage Error:  0.021589939388808335


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.5519648413947675
Mean Squared Error: 4.680050930443621
Root Mean Squared Error: 2.1633425365493144
R2 score:  0.9816359766072587
Explained Variance Score:  0.9820231043962541
Mean Absolute Percentage Error:  0.1005402839820028
[2.13613621 1.05221822 1.00198977 0.99584561 1.00360775 0.99996672
 1.00246734 1.01864243 1.05853588 1.08785035 1.10445703 1.12505949
 1.15975967 1.19089074 0.99820584 1.31919487 1.31919487 1.31919487
 1.44335708 1.3430526 ]
        s

In [301]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr_sox, y_test_mlpr_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_sox_nd, y_test_mlpr_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.065422264346664
Mean Squared Error: 155.46786493381586
Root Mean Squared Error: 12.468675347999717
R2 score:  0.14673474430632816
Explained Variance Score:  0.17694338413870292
Mean Absolute Percentage Error:  3.092266675343097


--- small graphs for training, large graphs for testing ---


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.055809954940118
Mean Squared Error: 239.786889283299
Root Mean Squared Error: 15.485053738469848
R2 score:  0.05910168296955243
Explained Variance Score:  0.12206543833448347
Mean Absolute Percentage Error:  4.01032100889165


In [302]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor_sox, y_test_mor_sox = _regressorAlgorithm(model, x_train_sox, x_test_sox, y_train_sox, y_test_sox)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_sox_nd, y_test_mor_sox_nd = _regressorAlgorithm(model, x_train_sox_nd, x_test_sox_nd, y_train_sox_nd, y_test_sox_nd)
#_regressorAlgorithm_KFold(model, X_sox, y_sox)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT EXECUTION

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 5.700975188477495
Mean Squared Error: 153.16856134134764
Root Mean Squared Error: 12.37612868959222
R2 score:  0.15935417449262412
Explained Variance Score:  0.15949848548233525
Mean Absolute Percentage Error:  2.7205831593962166


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.185792198866898
Mean Squared Error: 215.7119016754849
Root Mean Squared Error: 14.687133882261879
R2 score:  0.15356938047555968
Explained Variance Score:  0.15370178145599867
Mean Absolute Percentage Error:  2.682211876992542


# Predict Single Output - Error 

In [303]:
X_soe = df1.drop(['speedup', 'Error'],axis=1)
y_soe = df1[['Error']]
X_soe = X_soe.values

#split the data randomly
x_train_soe, x_test_soe, y_train_soe, y_test_soe = train_test_split(X_soe, y_soe, test_size=0.2, random_state=13)

#split data - small graphs for training, large graphs for testing
a = df1.loc[(df1['graphSize'] != 32) | (df1['sOriginalDistance'] == 1)]
b = df1.loc[(df1['graphSize'] == 32) & (df1['sOriginalDistance'] == 0)]
x_train_soe_nd = a.drop(['speedup', 'Error'],axis=1)
y_train_soe_nd = a[['Error']]
x_test_soe_nd = b.drop(['speedup', 'Error'],axis=1)
y_test_soe_nd = b[['Error']]

In [304]:
print("LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = LinearRegression()
y_pred_lr_soe, y_test_lr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_lr_soe_nd, y_test_lr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

LINEAR REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.985164812702663
Mean Squared Error: 98.40555024173176
Root Mean Squared Error: 9.919957169349662
R2 score:  0.6330048514106379
Explained Variance Score:  0.6330134812152786
Mean Absolute Percentage Error:  105352065.33336726


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.69828606649094
Mean Squared Error: 122.29760852593269
Root Mean Squared Error: 11.058824916144241
R2 score:  0.45498282775474375
Explained Variance Score:  0.4749188490815185
Mean Absolute Percentage Error:  169912643.04195625

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 7.99773
Mean Squared Error (avg): 98.49856
Root Mean Squared Error (avg): 9.92416
R2 score (avg) 0.63273
Explained Variance (avg): 0.63285
Mean Absolute Percen

In [305]:
print("KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = KNeighborsRegressor()
y_pred_knr_soe, y_test_knr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_knr_soe_nd, y_test_knr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
_regressorAlgorithm_KFold(model, X_soe, y_soe)

KNEIGHBORS REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.930055153575026
Mean Squared Error: 19.076501975814356
Root Mean Squared Error: 4.3676655064020595
R2 score:  0.9288558047794923
Explained Variance Score:  0.9296273984749437
Mean Absolute Percentage Error:  57608670.50098718


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 6.907074218750001
Mean Squared Error: 90.46882950585758
Root Mean Squared Error: 9.511510369329237
R2 score:  0.5968272296741981
Explained Variance Score:  0.6173547553197432
Mean Absolute Percentage Error:  32.14166982871655

### Performance Summarization of _regressorAlgorithm_KFold ###

Mean Absolute Error (avg): 2.79755
Mean Squared Error (avg): 17.82490
Root Mean Squared Error (avg): 4.22145
R2 score (avg) 0.93353
Explained Variance (avg): 0.93424
Mean Absolute Perc

In [306]:
print("RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = RandomForestRegressor(max_depth=1000, n_estimators=100, random_state=1)
y_pred_rfr_soe, y_test_rfr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_rfr_soe_nd, y_test_rfr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

RANDOM FOREST REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.4709158233471758
Mean Squared Error: 4.37877178891946
Root Mean Squared Error: 2.092551502094861
R2 score:  0.9836697422110251
Explained Variance Score:  0.9836712311650462
Mean Absolute Percentage Error:  294.5939258252585


--- small graphs for training, large graphs for testing ---


  model.fit(x_train, y_train)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 2.9307930243957494
Mean Squared Error: 16.66553189474811
Root Mean Squared Error: 4.082343921664135
R2 score:  0.9257303460246099
Explained Variance Score:  0.9257971011371073
Mean Absolute Percentage Error:  341.1707780327996


In [307]:
print("DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = DecisionTreeRegressor(max_depth=1000, splitter="best", criterion="mse", min_samples_split=2)
y_pred_dtr_soe, y_test_dtr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_dtr_soe_nd, y_test_dtr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

DECISION TREE REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 0.46779437416080694
Mean Squared Error: 4.6045931029439675
Root Mean Squared Error: 2.1458315644392894
R2 score:  0.9828275607843526
Explained Variance Score:  0.9828288574521017
Mean Absolute Percentage Error:  377.6744525884068


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 3.1290115775240306
Mean Squared Error: 19.528038754446786
Root Mean Squared Error: 4.419054056520104
R2 score:  0.9129736338287634
Explained Variance Score:  0.9130220882287947
Mean Absolute Percentage Error:  0.1297942086555997


In [308]:
print("MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = MLPRegressor(hidden_layer_sizes=(4,8,4),activation="relu" ,random_state=1, max_iter=50000)
y_pred_mlpr_soe, y_test_mlpr_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mlpr_soe_nd, y_test_mlpr_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

MLP REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 9.190513055308104
Mean Squared Error: 131.85003224401035
Root Mean Squared Error: 11.482596929441108
R2 score:  0.5082764939981781
Explained Variance Score:  0.5091133392325387
Mean Absolute Percentage Error:  246319490.53018987


--- small graphs for training, large graphs for testing ---


  return f(*args, **kwargs)



### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 9.092419248808056
Mean Squared Error: 130.75222757403253
Root Mean Squared Error: 11.434694030625941
R2 score:  0.41730496453610877
Explained Variance Score:  0.43234390948178425
Mean Absolute Percentage Error:  324440070.8691782


In [309]:
print("MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR")
model = MultiOutputRegressor(Ridge(random_state=123))
y_pred_mor_soe, y_test_mor_soe = _regressorAlgorithm(model, x_train_soe, x_test_soe, y_train_soe, y_test_soe)
print("\n\n--- small graphs for training, large graphs for testing ---")
y_pred_mor_soe_nd, y_test_mor_soe_nd = _regressorAlgorithm(model, x_train_soe_nd, x_test_soe_nd, y_train_soe_nd, y_test_soe_nd)
#_regressorAlgorithm_KFold(model, X_soe, y_soe)

MULTIOUTPUT REGRESSION ALGORITHM PERFORMANCE RESULTS ON SINGLE OUTPUT ERROR

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 7.985248013605316
Mean Squared Error: 98.40656113567148
Root Mean Squared Error: 9.92000812175431
R2 score:  0.6330010813674776
Explained Variance Score:  0.6330097141731292
Mean Absolute Percentage Error:  106190982.61863385


--- small graphs for training, large graphs for testing ---

### Performance Summarization of _regressorAlgorithm ###

Mean Absolute Error: 8.697893062452584
Mean Squared Error: 122.29369621674863
Root Mean Squared Error: 11.058648028432255
R2 score:  0.4550002628927987
Explained Variance Score:  0.47493719429870285
Mean Absolute Percentage Error:  169944376.1237833
