## Preliminaries

In [1]:
# pip install matplotlib pandas scikit-learn seaborn

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import mean_absolute_error

from modules.utils import PrintSummary, ShowPlots

## Data Loading

In [3]:
predictions = pd.read_csv("predictions/pm100/predictions.csv")
predictions

Unnamed: 0,job_id,user_id,time_submit,gt_runtime,pred_runtime_user,pred_runtime_heuristic,pred_runtime_dt_fs1,pred_runtime_dt_fs2,pred_runtime_rnp_fs1,pred_runtime_rnp_fs2,pred_runtime_knn_fs1,pred_runtime_knn_fs2,pred_runtime_ora,pred_runtime_knn_c4_fs1,pred_runtime_knn_c4_fs2,pred_runtime_knn_c7_fs1,pred_runtime_knn_c7_fs2
0,5154115,1711,11480812,4578,14400,1582,3488,4571,3722,3062,3330,4527,4444,Medium,Medium,Medium,Medium
1,5640678,1711,11480812,4614,14400,1582,3488,4454,3722,3029,3330,4503,4213,Medium,Medium,Medium,Medium
2,1597511,1711,11480812,4282,14400,1583,3488,4417,3722,3138,3330,4477,4353,Medium,Medium,Medium,Medium
3,4698474,1711,11480812,4534,14400,1583,3488,4486,3722,3092,3330,4502,4353,Medium,Medium,Medium,Medium
4,6129873,1711,11480812,4140,14400,1582,3488,4564,3722,3088,3330,4506,4649,Medium,Medium,Medium,Medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66751,3214055,8,13832570,4,300,300,3,2,3,3,177,72,4,Short,Very-Short,Short,Very-Short
66752,558069,8,13833191,2,300,300,3,3,3,3,177,72,10,Short,Very-Short,Short,Very-Short
66753,1997479,8,13833813,3,300,300,3,2,3,3,177,72,3,Short,Very-Short,Short,Very-Short
66754,4424415,8,13834310,3,300,300,3,3,3,3,177,72,300,Short,Very-Short,Short,Very-Short


In [4]:
w_900_1 = pd.read_csv('workloads/pm100/pm100_900_1.swf', sep="\t", header=None)
w_900_2 = pd.read_csv('workloads/pm100/pm100_900_2.swf', sep="\t", header=None)
w_900_3 = pd.read_csv('workloads/pm100/pm100_900_3.swf', sep="\t", header=None)
w_900_4 = pd.read_csv('workloads/pm100/pm100_900_4.swf', sep="\t", header=None)
w_900_5 = pd.read_csv('workloads/pm100/pm100_900_5.swf', sep="\t", header=None)

In [5]:
w_9k_1 = pd.read_csv('workloads/pm100/pm100_9k_1.swf', sep="\t", header=None)
w_9k_2 = pd.read_csv('workloads/pm100/pm100_9k_2.swf', sep="\t", header=None)
w_9k_3 = pd.read_csv('workloads/pm100/pm100_9k_3.swf', sep="\t", header=None)
w_9k_4 = pd.read_csv('workloads/pm100/pm100_9k_4.swf', sep="\t", header=None)
w_9k_5 = pd.read_csv('workloads/pm100/pm100_9k_5.swf', sep="\t", header=None)

## Data Filtering

In [6]:
pred_w_900_1 = predictions[predictions["job_id"].isin(w_900_1[0].values)]
pred_w_900_2 = predictions[predictions["job_id"].isin(w_900_2[0].values)]
pred_w_900_3 = predictions[predictions["job_id"].isin(w_900_3[0].values)]
pred_w_900_4 = predictions[predictions["job_id"].isin(w_900_4[0].values)]
pred_w_900_5 = predictions[predictions["job_id"].isin(w_900_5[0].values)]

In [7]:
pred_w_9k_1 = predictions[predictions["job_id"].isin(w_9k_1[0].values)]
pred_w_9k_2 = predictions[predictions["job_id"].isin(w_9k_2[0].values)]
pred_w_9k_3 = predictions[predictions["job_id"].isin(w_9k_3[0].values)]
pred_w_9k_4 = predictions[predictions["job_id"].isin(w_9k_4[0].values)]
pred_w_9k_5 = predictions[predictions["job_id"].isin(w_9k_5[0].values)]

## Metrics Helpers

In [8]:
def mean_average_percentage_error(y_true, y_pred):
    y_true = np.array(y_true, dtype=float)
    y_pred = np.array(y_pred, dtype=float)

    # Avoid division by zero
    mask = (y_true > 0) & (y_pred > 0)
    y_true = y_true[mask]
    y_pred = y_pred[mask]
    
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    return mape

In [9]:
def estimation_accuracy(y_true, y_pred):
    y_true = np.array(y_true, dtype=float)
    y_pred = np.array(y_pred, dtype=float)

    # Avoid division by zero
    mask = (y_true > 0) & (y_pred > 0)
    y_true = y_true[mask]
    y_pred = y_pred[mask]

    ea = np.where(y_pred <= y_true,
                  y_pred / y_true,
                  y_true / y_pred)

    return float(np.mean(ea))

In [10]:
def convert_to_hms(time_value, unit="seconds"):
    if unit == "minutes":
        time_value *= 60  # Convert minutes to seconds
    hours, remainder = divmod(time_value, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"

In [11]:
# gt_runtime pred_runtime_user pred_runtime_heuristic pred_runtime_dt_fs2 pred_runtime_rnp_fs2 pred_runtime_knn_fs2 pred_runtime_ora

## Complete Dataset

### Metric Computation

In [12]:
mae_usr = mean_absolute_error(predictions.gt_runtime, predictions.pred_runtime_user)
mae_heu = mean_absolute_error(predictions.gt_runtime, predictions.pred_runtime_heuristic)
mae_dt  = mean_absolute_error(predictions.gt_runtime, predictions.pred_runtime_dt_fs2)
mae_rnp = mean_absolute_error(predictions.gt_runtime, predictions.pred_runtime_rnp_fs2)
mae_knn = mean_absolute_error(predictions.gt_runtime, predictions.pred_runtime_knn_fs2)
mae_llm = mean_absolute_error(predictions.gt_runtime, predictions.pred_runtime_ora)

In [13]:
ea_usr = estimation_accuracy(predictions.gt_runtime, predictions.pred_runtime_user)
ea_heu = estimation_accuracy(predictions.gt_runtime, predictions.pred_runtime_heuristic)
ea_dt  = estimation_accuracy(predictions.gt_runtime, predictions.pred_runtime_dt_fs2)
ea_rnp = estimation_accuracy(predictions.gt_runtime, predictions.pred_runtime_rnp_fs2)
ea_knn = estimation_accuracy(predictions.gt_runtime, predictions.pred_runtime_knn_fs2)
ea_llm = estimation_accuracy(predictions.gt_runtime, predictions.pred_runtime_ora)

In [14]:
mape_usr = mean_average_percentage_error(predictions.gt_runtime, predictions.pred_runtime_user)
mape_heu = mean_average_percentage_error(predictions.gt_runtime, predictions.pred_runtime_heuristic)
mape_dt  = mean_average_percentage_error(predictions.gt_runtime, predictions.pred_runtime_dt_fs2)
mape_rnp = mean_average_percentage_error(predictions.gt_runtime, predictions.pred_runtime_rnp_fs2)
mape_knn = mean_average_percentage_error(predictions.gt_runtime, predictions.pred_runtime_knn_fs2)
mape_llm = mean_average_percentage_error(predictions.gt_runtime, predictions.pred_runtime_ora)

### Metrics

In [15]:
res = pd.DataFrame()
res['Workload'] = ('C', 'C', 'C', 'C', 'C', 'C')
res['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res['MAE'] = (mae_usr, mae_heu, mae_dt, mae_rnp, mae_knn, mae_llm)
res['Average Error'] = (convert_to_hms(mae_usr), convert_to_hms(mae_heu), convert_to_hms(mae_dt), convert_to_hms(mae_rnp), convert_to_hms(mae_knn), convert_to_hms(mae_llm))
res['EA'] = (ea_usr, ea_heu, ea_dt, ea_rnp, ea_knn, ea_llm)
res['MAPE'] = (mape_usr/1000, mape_heu/1000, mape_dt/1000, mape_rnp/1000, mape_knn/1000, mape_llm/1000)

res

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,C,User,56416.198364,15:40:16,0.107134,2999.341407
1,C,Heuristic,3695.931497,01:01:35,0.248127,53.277426
2,C,DT,3367.512313,00:56:07,0.578268,3.901901
3,C,RNP,4506.397537,01:15:06,0.56261,12.230835
4,C,KNN,3557.569267,00:59:17,0.27568,96.500035
5,C,ORA,394.735919,00:06:34,0.686491,1.621773


In [16]:
for col in predictions.columns:
    max_col = max(predictions[col])
    print(f"{col} = {max_col}")

job_id = 6242697
user_id = 1740
time_submit = 13834932
gt_runtime = 86395
pred_runtime_user = 86400
pred_runtime_heuristic = 69236
pred_runtime_dt_fs1 = 86376
pred_runtime_dt_fs2 = 86395
pred_runtime_rnp_fs1 = 86400
pred_runtime_rnp_fs2 = 86400
pred_runtime_knn_fs1 = 86400
pred_runtime_knn_fs2 = 85284
pred_runtime_ora = 86400
pred_runtime_knn_c4_fs1 = Very-Short
pred_runtime_knn_c4_fs2 = Very-Short
pred_runtime_knn_c7_fs1 = Very-Short
pred_runtime_knn_c7_fs2 = Very-Short


## Scenario (a): 900x98

### Metric Computation

In [17]:
mae_usr_sa1 = mean_absolute_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_user)
mae_heu_sa1 = mean_absolute_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_heuristic)
mae_dt_sa1  = mean_absolute_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_dt_fs2)
mae_rnp_sa1 = mean_absolute_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_rnp_fs2)
mae_knn_sa1 = mean_absolute_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_knn_fs2)
mae_llm_sa1 = mean_absolute_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_ora)

mae_usr_sa2 = mean_absolute_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_user)
mae_heu_sa2 = mean_absolute_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_heuristic)
mae_dt_sa2  = mean_absolute_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_dt_fs2)
mae_rnp_sa2 = mean_absolute_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_rnp_fs2)
mae_knn_sa2 = mean_absolute_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_knn_fs2)
mae_llm_sa2 = mean_absolute_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_ora)

mae_usr_sa3 = mean_absolute_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_user)
mae_heu_sa3 = mean_absolute_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_heuristic)
mae_dt_sa3  = mean_absolute_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_dt_fs2)
mae_rnp_sa3 = mean_absolute_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_rnp_fs2)
mae_knn_sa3 = mean_absolute_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_knn_fs2)
mae_llm_sa3 = mean_absolute_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_ora)

mae_usr_sa4 = mean_absolute_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_user)
mae_heu_sa4 = mean_absolute_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_heuristic)
mae_dt_sa4  = mean_absolute_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_dt_fs2)
mae_rnp_sa4 = mean_absolute_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_rnp_fs2)
mae_knn_sa4 = mean_absolute_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_knn_fs2)
mae_llm_sa4 = mean_absolute_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_ora)

mae_usr_sa5 = mean_absolute_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_user)
mae_heu_sa5 = mean_absolute_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_heuristic)
mae_dt_sa5  = mean_absolute_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_dt_fs2)
mae_rnp_sa5 = mean_absolute_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_rnp_fs2)
mae_knn_sa5 = mean_absolute_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_knn_fs2)
mae_llm_sa5 = mean_absolute_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_ora)

In [18]:
ea_usr_sa1 = estimation_accuracy(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_user)
ea_heu_sa1 = estimation_accuracy(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_heuristic)
ea_dt_sa1  = estimation_accuracy(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_dt_fs2)
ea_rnp_sa1 = estimation_accuracy(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_rnp_fs2)
ea_knn_sa1 = estimation_accuracy(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_knn_fs2)
ea_llm_sa1 = estimation_accuracy(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_ora)

ea_usr_sa2 = estimation_accuracy(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_user)
ea_heu_sa2 = estimation_accuracy(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_heuristic)
ea_dt_sa2  = estimation_accuracy(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_dt_fs2)
ea_rnp_sa2 = estimation_accuracy(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_rnp_fs2)
ea_knn_sa2 = estimation_accuracy(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_knn_fs2)
ea_llm_sa2 = estimation_accuracy(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_ora)

ea_usr_sa3 = estimation_accuracy(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_user)
ea_heu_sa3 = estimation_accuracy(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_heuristic)
ea_dt_sa3  = estimation_accuracy(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_dt_fs2)
ea_rnp_sa3 = estimation_accuracy(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_rnp_fs2)
ea_knn_sa3 = estimation_accuracy(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_knn_fs2)
ea_llm_sa3 = estimation_accuracy(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_ora)

ea_usr_sa4 = estimation_accuracy(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_user)
ea_heu_sa4 = estimation_accuracy(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_heuristic)
ea_dt_sa4  = estimation_accuracy(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_dt_fs2)
ea_rnp_sa4 = estimation_accuracy(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_rnp_fs2)
ea_knn_sa4 = estimation_accuracy(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_knn_fs2)
ea_llm_sa4 = estimation_accuracy(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_ora)

ea_usr_sa5 = estimation_accuracy(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_user)
ea_heu_sa5 = estimation_accuracy(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_heuristic)
ea_dt_sa5  = estimation_accuracy(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_dt_fs2)
ea_rnp_sa5 = estimation_accuracy(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_rnp_fs2)
ea_knn_sa5 = estimation_accuracy(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_knn_fs2)
ea_llm_sa5 = estimation_accuracy(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_ora)

In [19]:
mape_usr_sa1 = mean_average_percentage_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_user)
mape_heu_sa1 = mean_average_percentage_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_heuristic)
mape_dt_sa1  = mean_average_percentage_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_dt_fs2)
mape_rnp_sa1 = mean_average_percentage_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_rnp_fs2)
mape_knn_sa1 = mean_average_percentage_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_knn_fs2)
mape_llm_sa1 = mean_average_percentage_error(pred_w_900_1.gt_runtime, pred_w_900_1.pred_runtime_ora)

mape_usr_sa2 = mean_average_percentage_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_user)
mape_heu_sa2 = mean_average_percentage_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_heuristic)
mape_dt_sa2  = mean_average_percentage_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_dt_fs2)
mape_rnp_sa2 = mean_average_percentage_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_rnp_fs2)
mape_knn_sa2 = mean_average_percentage_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_knn_fs2)
mape_llm_sa2 = mean_average_percentage_error(pred_w_900_2.gt_runtime, pred_w_900_2.pred_runtime_ora)

mape_usr_sa3 = mean_average_percentage_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_user)
mape_heu_sa3 = mean_average_percentage_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_heuristic)
mape_dt_sa3  = mean_average_percentage_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_dt_fs2)
mape_rnp_sa3 = mean_average_percentage_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_rnp_fs2)
mape_knn_sa3 = mean_average_percentage_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_knn_fs2)
mape_llm_sa3 = mean_average_percentage_error(pred_w_900_3.gt_runtime, pred_w_900_3.pred_runtime_ora)

mape_usr_sa4 = mean_average_percentage_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_user)
mape_heu_sa4 = mean_average_percentage_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_heuristic)
mape_dt_sa4  = mean_average_percentage_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_dt_fs2)
mape_rnp_sa4 = mean_average_percentage_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_rnp_fs2)
mape_knn_sa4 = mean_average_percentage_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_knn_fs2)
mape_llm_sa4 = mean_average_percentage_error(pred_w_900_4.gt_runtime, pred_w_900_4.pred_runtime_ora)

mape_usr_sa5 = mean_average_percentage_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_user)
mape_heu_sa5 = mean_average_percentage_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_heuristic)
mape_dt_sa5  = mean_average_percentage_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_dt_fs2)
mape_rnp_sa5 = mean_average_percentage_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_rnp_fs2)
mape_knn_sa5 = mean_average_percentage_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_knn_fs2)
mape_llm_sa5 = mean_average_percentage_error(pred_w_900_5.gt_runtime, pred_w_900_5.pred_runtime_ora)

In [20]:
mae_usr_avg = (mae_usr_sa1 + mae_usr_sa2 + mae_usr_sa3 + mae_usr_sa4 + mae_usr_sa5)/5
mae_heu_avg = (mae_heu_sa1 + mae_heu_sa2 + mae_heu_sa3 + mae_heu_sa4 + mae_heu_sa5)/5
mae_dt_avg  = (mae_dt_sa1 + mae_dt_sa2 + mae_dt_sa3 + mae_dt_sa4 + mae_dt_sa5)/5
mae_rnp_avg = (mae_rnp_sa1 + mae_rnp_sa2 + mae_rnp_sa3 + mae_rnp_sa4 + mae_rnp_sa5)/5 
mae_knn_avg = (mae_knn_sa1 + mae_knn_sa2 + mae_knn_sa3 + mae_knn_sa4 + mae_knn_sa5)/5
mae_llm_avg = (mae_llm_sa1 + mae_llm_sa2 + mae_llm_sa3 + mae_llm_sa4 + mae_llm_sa5)/5 

ea_usr_avg = (ea_usr_sa1 + ea_usr_sa2 + ea_usr_sa3 + ea_usr_sa4 + ea_usr_sa5)/5
ea_heu_avg = (ea_heu_sa1 + ea_heu_sa2 + ea_heu_sa3 + ea_heu_sa4 + ea_heu_sa5)/5
ea_dt_avg  = (ea_dt_sa1 + ea_dt_sa2 + ea_dt_sa3 + ea_dt_sa4 + ea_dt_sa5)/5
ea_rnp_avg = (ea_rnp_sa1 + ea_rnp_sa2 + ea_rnp_sa3 + ea_rnp_sa4 + ea_rnp_sa5)/5
ea_knn_avg = (ea_knn_sa1 + ea_knn_sa2 + ea_knn_sa3 + ea_knn_sa4 + ea_knn_sa5)/5
ea_llm_avg = (ea_llm_sa1 + ea_llm_sa2 + ea_llm_sa3 + ea_llm_sa4 + ea_llm_sa5)/5

mape_usr_avg = (mape_usr_sa1 + mape_usr_sa2 + mape_usr_sa3 + mape_usr_sa4 + mape_usr_sa5)/5
mape_heu_avg = (mape_heu_sa1 + mape_heu_sa2 + mape_heu_sa3 + mape_heu_sa4 + mape_heu_sa5)/5
mape_dt_avg  = (mape_dt_sa1 + mape_dt_sa2 + mape_dt_sa3 + mape_dt_sa4 + mape_dt_sa5)/5
mape_rnp_avg = (mape_rnp_sa1 + mape_rnp_sa2 + mape_rnp_sa3 + mape_rnp_sa4 + mape_rnp_sa5)/5
mape_knn_avg = (mape_knn_sa1 + mape_knn_sa2 + mape_knn_sa3 + mape_knn_sa4 + mape_knn_sa5)/5
mape_llm_avg = (mape_llm_sa1 + mape_llm_sa2 + mape_llm_sa3 + mape_llm_sa4 + mape_llm_sa5)/5

### Average Metrics

In [21]:
res_avg = pd.DataFrame()
res_avg['Workload'] = ('A', 'A', 'A', 'A', 'A', 'A')
res_avg['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_avg['MAE'] = (mae_usr_avg, mae_heu_avg, mae_dt_avg, mae_rnp_avg, mae_knn_avg, mae_llm_avg)
res_avg['Average Error'] = (convert_to_hms(mae_usr_avg), convert_to_hms(mae_heu_avg), convert_to_hms(mae_dt_avg), convert_to_hms(mae_rnp_avg), convert_to_hms(mae_knn_avg), convert_to_hms(mae_llm_avg))
res_avg['EA'] = (ea_usr_avg, ea_heu_avg, ea_dt_avg, ea_rnp_avg, ea_knn_avg, ea_llm_avg)
res_avg['MAPE'] = (mape_usr_avg/1000, mape_heu_avg/1000, mape_dt_avg/1000, mape_rnp_avg/1000, mape_knn_avg/1000, mape_llm_avg/1000)

res_avg

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,A,User,56172.860869,15:36:12,0.106694,3047.455561
1,A,Heuristic,3716.361068,01:01:56,0.249434,53.482906
2,A,DT,3434.402844,00:57:14,0.577213,2.769192
3,A,RNP,4522.868415,01:15:22,0.570417,9.026398
4,A,KNN,3622.447892,01:00:22,0.279489,96.315463
5,A,ORA,451.479938,00:07:31,0.684957,2.970193


### Details

In [22]:
res_sa1 = pd.DataFrame()
res_sa1['Workload'] = ('H1', 'H1', 'H1', 'H1', 'H1', 'H1')
res_sa1['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sa1['MAE'] = (mae_usr_sa1, mae_heu_sa1, mae_dt_sa1, mae_rnp_sa1, mae_knn_sa1, mae_llm_sa1)
res_sa1['Average Error'] = (convert_to_hms(mae_usr_sa1), convert_to_hms(mae_heu_sa1), convert_to_hms(mae_dt_sa1), convert_to_hms(mae_rnp_sa1), convert_to_hms(mae_knn_sa1), convert_to_hms(mae_llm_sa1))
res_sa1['EA'] = (ea_usr_sa1, ea_heu_sa1, ea_dt_sa1, ea_rnp_sa1, ea_knn_sa1, ea_llm_sa1)
res_sa1['MAPE'] = (mape_usr_sa1/1000, mape_heu_sa1/1000, mape_dt_sa1/1000, mape_rnp_sa1/1000, mape_knn_sa1/1000, mape_llm_sa1/1000)

res_sa1

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,H1,User,55714.908788,15:28:34,0.107997,3010.32186
1,H1,Heuristic,3562.805339,00:59:22,0.258553,51.160803
2,H1,DT,2830.797553,00:47:10,0.603637,1.633336
3,H1,RNP,4284.866518,01:11:24,0.571296,4.556861
4,H1,KNN,3600.516129,01:00:00,0.278677,98.165183
5,H1,ORA,430.719689,00:07:10,0.676029,3.715369


In [23]:
res_sa2 = pd.DataFrame()
res_sa2['Workload'] = ('H2', 'H2', 'H2', 'H2', 'H2', 'H2')
res_sa2['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sa2['MAE'] = (mae_usr_sa2, mae_heu_sa2, mae_dt_sa2, mae_rnp_sa2, mae_knn_sa2, mae_llm_sa2)
res_sa2['Average Error'] = (convert_to_hms(mae_usr_sa2), convert_to_hms(mae_heu_sa2), convert_to_hms(mae_dt_sa2), convert_to_hms(mae_rnp_sa2), convert_to_hms(mae_knn_sa2), convert_to_hms(mae_llm_sa2))
res_sa2['EA'] = (ea_usr_sa2, ea_heu_sa2, ea_dt_sa2, ea_rnp_sa2, ea_knn_sa2, ea_llm_sa2)
res_sa2['MAPE'] = (mape_usr_sa2/1000, mape_heu_sa2/1000, mape_dt_sa2/1000, mape_rnp_sa2/1000, mape_knn_sa2/1000, mape_llm_sa2/1000)

res_sa2

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,H2,User,57402.017778,15:56:42,0.103795,3198.918766
1,H2,Heuristic,3718.451111,01:01:58,0.238899,56.233484
2,H2,DT,3448.29,00:57:28,0.590137,2.599399
3,H2,RNP,4060.72,01:07:40,0.598508,13.680785
4,H2,KNN,3469.355556,00:57:49,0.282998,101.766107
5,H2,ORA,552.502222,00:09:12,0.6727,1.832454


In [24]:
res_sa3 = pd.DataFrame()
res_sa3['Workload'] = ('H3', 'H3', 'H3', 'H3', 'H3', 'H3')
res_sa3['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sa3['MAE'] = (mae_usr_sa3, mae_heu_sa3, mae_dt_sa3, mae_rnp_sa3, mae_knn_sa3, mae_llm_sa3)
res_sa3['Average Error'] = (convert_to_hms(mae_usr_sa3), convert_to_hms(mae_heu_sa3), convert_to_hms(mae_dt_sa3), convert_to_hms(mae_rnp_sa3), convert_to_hms(mae_knn_sa3), convert_to_hms(mae_llm_sa3))
res_sa3['EA'] = (ea_usr_sa3, ea_heu_sa3, ea_dt_sa3, ea_rnp_sa3, ea_knn_sa3, ea_llm_sa3)
res_sa3['MAPE'] = (mape_usr_sa3/1000, mape_heu_sa3/1000, mape_dt_sa3/1000, mape_rnp_sa3/1000, mape_knn_sa3/1000, mape_llm_sa3/1000)

res_sa3

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,H3,User,55567.548889,15:26:07,0.115287,2899.301772
1,H3,Heuristic,4164.473333,01:09:24,0.256882,51.817835
2,H3,DT,3675.483333,01:01:15,0.571443,2.230135
3,H3,RNP,5120.344444,01:25:20,0.562407,12.793807
4,H3,KNN,4090.06,01:08:10,0.286363,93.370986
5,H3,ORA,352.368889,00:05:52,0.713626,0.288591


In [25]:
res_sa4 = pd.DataFrame()
res_sa4['Workload'] = ('H4', 'H4', 'H4', 'H4', 'H4', 'H4')
res_sa4['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sa4['MAE'] = (mae_usr_sa4, mae_heu_sa4, mae_dt_sa4, mae_rnp_sa4, mae_knn_sa4, mae_llm_sa4)
res_sa4['Average Error'] = (convert_to_hms(mae_usr_sa4), convert_to_hms(mae_heu_sa4), convert_to_hms(mae_dt_sa4), convert_to_hms(mae_rnp_sa4), convert_to_hms(mae_knn_sa4), convert_to_hms(mae_llm_sa4))
res_sa4['EA'] = (ea_usr_sa4, ea_heu_sa4, ea_dt_sa4, ea_rnp_sa4, ea_knn_sa4, ea_llm_sa4)
res_sa4['MAPE'] = (mape_usr_sa4/1000, mape_heu_sa4/1000, mape_dt_sa4/1000, mape_rnp_sa4/1000, mape_knn_sa4/1000, mape_llm_sa4/1000)

res_sa4

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,H4,User,56458.14,15:40:58,0.099859,2990.007826
1,H4,Heuristic,3438.937778,00:57:18,0.245068,54.575508
2,H4,DT,3446.436667,00:57:26,0.563503,4.100193
3,H4,RNP,4797.261111,01:19:57,0.554631,8.038353
4,H4,KNN,3246.438889,00:54:06,0.273676,89.125184
5,H4,ORA,545.918889,00:09:05,0.684687,0.355279


In [26]:
res_sa5 = pd.DataFrame()
res_sa5['Workload'] = ('H5', 'H5', 'H5', 'H5', 'H5', 'H5')
res_sa5['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sa5['MAE'] = (mae_usr_sa5, mae_heu_sa5, mae_dt_sa5, mae_rnp_sa5, mae_knn_sa5, mae_llm_sa5)
res_sa5['Average Error'] = (convert_to_hms(mae_usr_sa5), convert_to_hms(mae_heu_sa5), convert_to_hms(mae_dt_sa5), convert_to_hms(mae_rnp_sa5), convert_to_hms(mae_knn_sa5), convert_to_hms(mae_llm_sa5))
res_sa5['EA'] = (ea_usr_sa5, ea_heu_sa5, ea_dt_sa5, ea_rnp_sa5, ea_knn_sa5, ea_llm_sa5)
res_sa5['MAPE'] = (mape_usr_sa5/1000, mape_heu_sa5/1000, mape_dt_sa5/1000, mape_rnp_sa5/1000, mape_knn_sa5/1000, mape_llm_sa5/1000)

res_sa5

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,H5,User,55721.688889,15:28:41,0.106531,3138.727581
1,H5,Heuristic,3697.137778,01:01:37,0.24777,53.626902
2,H5,DT,3771.006667,01:02:51,0.557346,3.282896
3,H5,RNP,4351.15,01:12:31,0.565243,6.062187
4,H5,KNN,3705.868889,01:01:45,0.27573,99.149853
5,H5,ORA,375.89,00:06:15,0.677744,8.65927


In [37]:
results = pd.concat([res, res_avg, res_sa1, res_sa2, res_sa3, res_sa4, res_sa5], axis=0)
results = results.reset_index(drop=True)
results

Unnamed: 0,Workload,Method,MAE,Average Error,EA,MAPE
0,C,User,56416.198364,15:40:16,0.107134,2999.341407
1,C,Heuristic,3695.931497,01:01:35,0.248127,53.277426
2,C,DT,3367.512313,00:56:07,0.578268,3.901901
3,C,RNP,4506.397537,01:15:06,0.56261,12.230835
4,C,KNN,3557.569267,00:59:17,0.27568,96.500035
5,C,ORA,394.735919,00:06:34,0.686491,1.621773
6,A,User,56172.860869,15:36:12,0.106694,3047.455561
7,A,Heuristic,3716.361068,01:01:56,0.249434,53.482906
8,A,DT,3434.402844,00:57:14,0.577213,2.769192
9,A,RNP,4522.868415,01:15:22,0.570417,9.026398


In [39]:
results.to_csv("metrics/pm100_900_metrics.csv", index=False)

## Scenario (b): 9000x980

### Metric Computation

In [25]:
mae_usr_sb1 = mean_absolute_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_user)
mae_heu_sb1 = mean_absolute_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_heuristic)
mae_dt_sb1  = mean_absolute_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_dt_fs2)
mae_rnp_sb1 = mean_absolute_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_rnp_fs2)
mae_knn_sb1 = mean_absolute_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_knn_fs2)
mae_llm_sb1 = mean_absolute_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_ora)

mae_usr_sb2 = mean_absolute_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_user)
mae_heu_sb2 = mean_absolute_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_heuristic)
mae_dt_sb2  = mean_absolute_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_dt_fs2)
mae_rnp_sb2 = mean_absolute_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_rnp_fs2)
mae_knn_sb2 = mean_absolute_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_knn_fs2)
mae_llm_sb2 = mean_absolute_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_ora)

mae_usr_sb3 = mean_absolute_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_user)
mae_heu_sb3 = mean_absolute_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_heuristic)
mae_dt_sb3  = mean_absolute_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_dt_fs2)
mae_rnp_sb3 = mean_absolute_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_rnp_fs2)
mae_knn_sb3 = mean_absolute_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_knn_fs2)
mae_llm_sb3 = mean_absolute_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_ora)

mae_usr_sb4 = mean_absolute_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_user)
mae_heu_sb4 = mean_absolute_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_heuristic)
mae_dt_sb4  = mean_absolute_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_dt_fs2)
mae_rnp_sb4 = mean_absolute_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_rnp_fs2)
mae_knn_sb4 = mean_absolute_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_knn_fs2)
mae_llm_sb4 = mean_absolute_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_ora)

mae_usr_sb5 = mean_absolute_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_user)
mae_heu_sb5 = mean_absolute_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_heuristic)
mae_dt_sb5  = mean_absolute_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_dt_fs2)
mae_rnp_sb5 = mean_absolute_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_rnp_fs2)
mae_knn_sb5 = mean_absolute_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_knn_fs2)
mae_llm_sb5 = mean_absolute_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_ora)

In [26]:
ea_usr_sb1 = estimation_accuracy(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_user)
ea_heu_sb1 = estimation_accuracy(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_heuristic)
ea_dt_sb1  = estimation_accuracy(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_dt_fs2)
ea_rnp_sb1 = estimation_accuracy(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_rnp_fs2)
ea_knn_sb1 = estimation_accuracy(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_knn_fs2)
ea_llm_sb1 = estimation_accuracy(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_ora)

ea_usr_sb2 = estimation_accuracy(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_user)
ea_heu_sb2 = estimation_accuracy(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_heuristic)
ea_dt_sb2  = estimation_accuracy(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_dt_fs2)
ea_rnp_sb2 = estimation_accuracy(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_rnp_fs2)
ea_knn_sb2 = estimation_accuracy(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_knn_fs2)
ea_llm_sb2 = estimation_accuracy(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_ora)

ea_usr_sb3 = estimation_accuracy(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_user)
ea_heu_sb3 = estimation_accuracy(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_heuristic)
ea_dt_sb3  = estimation_accuracy(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_dt_fs2)
ea_rnp_sb3 = estimation_accuracy(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_rnp_fs2)
ea_knn_sb3 = estimation_accuracy(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_knn_fs2)
ea_llm_sb3 = estimation_accuracy(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_ora)

ea_usr_sb4 = estimation_accuracy(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_user)
ea_heu_sb4 = estimation_accuracy(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_heuristic)
ea_dt_sb4  = estimation_accuracy(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_dt_fs2)
ea_rnp_sb4 = estimation_accuracy(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_rnp_fs2)
ea_knn_sb4 = estimation_accuracy(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_knn_fs2)
ea_llm_sb4 = estimation_accuracy(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_ora)

ea_usr_sb5 = estimation_accuracy(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_user)
ea_heu_sb5 = estimation_accuracy(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_heuristic)
ea_dt_sb5  = estimation_accuracy(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_dt_fs2)
ea_rnp_sb5 = estimation_accuracy(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_rnp_fs2)
ea_knn_sb5 = estimation_accuracy(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_knn_fs2)
ea_llm_sb5 = estimation_accuracy(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_ora)

In [27]:
mape_usr_sb1 = mean_average_percentage_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_user)
mape_heu_sb1 = mean_average_percentage_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_heuristic)
mape_dt_sb1  = mean_average_percentage_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_dt_fs2)
mape_rnp_sb1 = mean_average_percentage_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_rnp_fs2)
mape_knn_sb1 = mean_average_percentage_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_knn_fs2)
mape_llm_sb1 = mean_average_percentage_error(pred_w_9k_1.gt_runtime, pred_w_9k_1.pred_runtime_ora)

mape_usr_sb2 = mean_average_percentage_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_user)
mape_heu_sb2 = mean_average_percentage_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_heuristic)
mape_dt_sb2  = mean_average_percentage_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_dt_fs2)
mape_rnp_sb2 = mean_average_percentage_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_rnp_fs2)
mape_knn_sb2 = mean_average_percentage_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_knn_fs2)
mape_llm_sb2 = mean_average_percentage_error(pred_w_9k_2.gt_runtime, pred_w_9k_2.pred_runtime_ora)

mape_usr_sb3 = mean_average_percentage_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_user)
mape_heu_sb3 = mean_average_percentage_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_heuristic)
mape_dt_sb3  = mean_average_percentage_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_dt_fs2)
mape_rnp_sb3 = mean_average_percentage_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_rnp_fs2)
mape_knn_sb3 = mean_average_percentage_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_knn_fs2)
mape_llm_sb3 = mean_average_percentage_error(pred_w_9k_3.gt_runtime, pred_w_9k_3.pred_runtime_ora)

mape_usr_sb4 = mean_average_percentage_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_user)
mape_heu_sb4 = mean_average_percentage_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_heuristic)
mape_dt_sb4  = mean_average_percentage_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_dt_fs2)
mape_rnp_sb4 = mean_average_percentage_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_rnp_fs2)
mape_knn_sb4 = mean_average_percentage_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_knn_fs2)
mape_llm_sb4 = mean_average_percentage_error(pred_w_9k_4.gt_runtime, pred_w_9k_4.pred_runtime_ora)

mape_usr_sb5 = mean_average_percentage_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_user)
mape_heu_sb5 = mean_average_percentage_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_heuristic)
mape_dt_sb5  = mean_average_percentage_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_dt_fs2)
mape_rnp_sb5 = mean_average_percentage_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_rnp_fs2)
mape_knn_sb5 = mean_average_percentage_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_knn_fs2)
mape_llm_sb5 = mean_average_percentage_error(pred_w_9k_5.gt_runtime, pred_w_9k_5.pred_runtime_ora)

In [28]:
mae_usr_avg = (mae_usr_sb1 + mae_usr_sb2 + mae_usr_sb3 + mae_usr_sb4 + mae_usr_sb5)/5
mae_heu_avg = (mae_heu_sb1 + mae_heu_sb2 + mae_heu_sb3 + mae_heu_sb4 + mae_heu_sb5)/5
mae_dt_avg  = (mae_dt_sb1 + mae_dt_sb2 + mae_dt_sb3 + mae_dt_sb4 + mae_dt_sb5)/5
mae_rnp_avg = (mae_rnp_sb1 + mae_rnp_sb2 + mae_rnp_sb3 + mae_rnp_sb4 + mae_rnp_sb5)/5 
mae_knn_avg = (mae_knn_sb1 + mae_knn_sb2 + mae_knn_sb3 + mae_knn_sb4 + mae_knn_sb5)/5
mae_llm_avg = (mae_llm_sb1 + mae_llm_sb2 + mae_llm_sb3 + mae_llm_sb4 + mae_llm_sb5)/5 

ea_usr_avg = (ea_usr_sb1 + ea_usr_sb2 + ea_usr_sb3 + ea_usr_sb4 + ea_usr_sb5)/5
ea_heu_avg = (ea_heu_sb1 + ea_heu_sb2 + ea_heu_sb3 + ea_heu_sb4 + ea_heu_sb5)/5
ea_dt_avg  = (ea_dt_sb1 + ea_dt_sb2 + ea_dt_sb3 + ea_dt_sb4 + ea_dt_sb5)/5
ea_rnp_avg = (ea_rnp_sb1 + ea_rnp_sb2 + ea_rnp_sb3 + ea_rnp_sb4 + ea_rnp_sb5)/5
ea_knn_avg = (ea_knn_sb1 + ea_knn_sb2 + ea_knn_sb3 + ea_knn_sb4 + ea_knn_sb5)/5
ea_llm_avg = (ea_llm_sb1 + ea_llm_sb2 + ea_llm_sb3 + ea_llm_sb4 + ea_llm_sb5)/5

mape_usr_avg = (mape_usr_sb1 + mape_usr_sb2 + mape_usr_sb3 + mape_usr_sb4 + mape_usr_sb5)/5
mape_heu_avg = (mape_heu_sb1 + mape_heu_sb2 + mape_heu_sb3 + mape_heu_sb4 + mape_heu_sb5)/5
mape_dt_avg  = (mape_dt_sb1 + mape_dt_sb2 + mape_dt_sb3 + mape_dt_sb4 + mape_dt_sb5)/5
mape_rnp_avg = (mape_rnp_sb1 + mape_rnp_sb2 + mape_rnp_sb3 + mape_rnp_sb4 + mape_rnp_sb5)/5
mape_knn_avg = (mape_knn_sb1 + mape_knn_sb2 + mape_knn_sb3 + mape_knn_sb4 + mape_knn_sb5)/5
mape_llm_avg = (mape_llm_sb1 + mape_llm_sb2 + mape_llm_sb3 + mape_llm_sb4 + mape_llm_sb5)/5

### Average Metrics

In [29]:
res_avg = pd.DataFrame()
res_avg['Workload'] = ('A', 'A', 'A', 'A', 'A', 'A')
res_avg['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_avg['MAE'] = (mae_usr_avg, mae_heu_avg, mae_dt_avg, mae_rnp_avg, mae_knn_avg, mae_llm_avg)
res_avg['Average Error'] = (convert_to_hms(mae_usr_avg), convert_to_hms(mae_heu_avg), convert_to_hms(mae_dt_avg), convert_to_hms(mae_rnp_avg), convert_to_hms(mae_knn_avg), convert_to_hms(mae_llm_avg))
res_avg['EA'] = (ea_usr_avg, ea_heu_avg, ea_dt_avg, ea_rnp_avg, ea_knn_avg, ea_llm_avg)
res_avg['MAPE'] = (mape_usr_avg/1000, mape_heu_avg/1000, mape_dt_avg/1000, mape_rnp_avg/1000, mape_knn_avg/1000, mape_llm_avg/1000)

res_avg

Unnamed: 0,Method,MAE,Average Error,EA,MAPE
0,User,56733.094692,15:45:33,0.105948,3053.801039
1,Heuristic,3674.000685,01:01:14,0.244957,53.164217
2,DT,3363.091789,00:56:03,0.579554,3.670837
3,RNP,4410.727896,01:13:30,0.566391,10.748077
4,KNN,3553.991907,00:59:13,0.271668,97.631454
5,ORA,368.302978,00:06:08,0.683415,1.500171


### Details

In [30]:
res_sb1 = pd.DataFrame()
res_sb1['Workload'] = ('H1', 'H1', 'H1', 'H1', 'H1', 'H1')
res_sb1['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sb1['MAE'] = (mae_usr_sb1, mae_heu_sb1, mae_dt_sb1, mae_rnp_sb1, mae_knn_sb1, mae_llm_sb1)
res_sb1['Average Error'] = (convert_to_hms(mae_usr_sb1), convert_to_hms(mae_heu_sb1), convert_to_hms(mae_dt_sb1), convert_to_hms(mae_rnp_sb1), convert_to_hms(mae_knn_sb1), convert_to_hms(mae_llm_sb1))
res_sb1['EA'] = (ea_usr_sb1, ea_heu_sb1, ea_dt_sb1, ea_rnp_sb1, ea_knn_sb1, ea_llm_sb1)
res_sb1['MAPE'] = (mape_usr_sb1/1000, mape_heu_sb1/1000, mape_dt_sb1/1000, mape_rnp_sb1/1000, mape_knn_sb1/1000, mape_llm_sb1/1000)

res_sb1

Unnamed: 0,Method,MAE,Average Error,EA,MAPE
0,User,56525.020338,15:42:05,0.106222,2988.984694
1,Heuristic,3663.694599,01:01:03,0.25106,52.222476
2,DT,3324.635697,00:55:24,0.580599,4.100366
3,RNP,4434.906979,01:13:54,0.566197,9.610632
4,KNN,3461.619582,00:57:41,0.278183,97.277741
5,ORA,370.311403,00:06:10,0.688209,2.513027


In [31]:
res_sb2 = pd.DataFrame()
res_sb2['Workload'] = ('H2', 'H2', 'H2', 'H2', 'H2', 'H2')
res_sb2['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sb2['MAE'] = (mae_usr_sb2, mae_heu_sb2, mae_dt_sb2, mae_rnp_sb2, mae_knn_sb2, mae_llm_sb2)
res_sb2['Average Error'] = (convert_to_hms(mae_usr_sb2), convert_to_hms(mae_heu_sb2), convert_to_hms(mae_dt_sb2), convert_to_hms(mae_rnp_sb2), convert_to_hms(mae_knn_sb2), convert_to_hms(mae_llm_sb2))
res_sb2['EA'] = (ea_usr_sb2, ea_heu_sb2, ea_dt_sb2, ea_rnp_sb2, ea_knn_sb2, ea_llm_sb2)
res_sb2['MAPE'] = (mape_usr_sb2/1000, mape_heu_sb2/1000, mape_dt_sb2/1000, mape_rnp_sb2/1000, mape_knn_sb2/1000, mape_llm_sb2/1000)

res_sb2

Unnamed: 0,Method,MAE,Average Error,EA,MAPE
0,User,56706.89421,15:45:06,0.106343,3094.771595
1,Heuristic,3699.496944,01:01:39,0.243788,53.80324
2,DT,3317.23936,00:55:17,0.584509,3.335178
3,RNP,4233.172464,01:10:33,0.571864,7.642176
4,KNN,3592.459829,00:59:52,0.270888,98.526746
5,ORA,399.408045,00:06:39,0.678879,0.821249


In [32]:
res_sb3 = pd.DataFrame()
res_sb3['Workload'] = ('H3', 'H3', 'H3', 'H3', 'H3', 'H3')
res_sb3['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sb3['MAE'] = (mae_usr_sb3, mae_heu_sb3, mae_dt_sb3, mae_rnp_sb3, mae_knn_sb3, mae_llm_sb3)
res_sb3['Average Error'] = (convert_to_hms(mae_usr_sb3), convert_to_hms(mae_heu_sb3), convert_to_hms(mae_dt_sb3), convert_to_hms(mae_rnp_sb3), convert_to_hms(mae_knn_sb3), convert_to_hms(mae_llm_sb3))
res_sb3['EA'] = (ea_usr_sb3, ea_heu_sb3, ea_dt_sb3, ea_rnp_sb3, ea_knn_sb3, ea_llm_sb3)
res_sb3['MAPE'] = (mape_usr_sb3/1000, mape_heu_sb3/1000, mape_dt_sb3/1000, mape_rnp_sb3/1000, mape_knn_sb3/1000, mape_llm_sb3/1000)

res_sb3

Unnamed: 0,Method,MAE,Average Error,EA,MAPE
0,User,56967.369527,15:49:27,0.105281,3077.646827
1,Heuristic,3658.810513,01:00:58,0.240021,53.466936
2,DT,3447.400311,00:57:27,0.573555,3.576967
3,RNP,4564.104245,01:16:04,0.561113,14.991422
4,KNN,3607.89631,01:00:07,0.265931,97.089875
5,ORA,335.189487,00:05:35,0.683155,1.166236


In [None]:
res_sb4 = pd.DataFrame()
res_sb4['Workload'] = ('H4', 'H4', 'H4', 'H4', 'H4', 'H4')
res_sb4['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sb4['MAE'] = (mae_usr_sb4, mae_heu_sb4, mae_dt_sb4, mae_rnp_sb4, mae_knn_sb4, mae_llm_sb4)
res_sb4['Average Error'] = (convert_to_hms(mae_usr_sb4), convert_to_hms(mae_heu_sb4), convert_to_hms(mae_dt_sb4), convert_to_hms(mae_rnp_sb4), convert_to_hms(mae_knn_sb4), convert_to_hms(mae_llm_sb4))
res_sb4['EA'] = (ea_usr_sb4, ea_heu_sb4, ea_dt_sb4, ea_rnp_sb4, ea_knn_sb4, ea_llm_sb4)
res_sb4['MAPE'] = (mape_usr_sb4/1000, mape_heu_sb4/1000, mape_dt_sb4/1000, mape_rnp_sb4/1000, mape_knn_sb4/1000, mape_llm_sb4/1000)

res_sb4

In [None]:
res_sb5 = pd.DataFrame()
res_sb5['Workload'] = ('H5', 'H5', 'H5', 'H5', 'H5', 'H5')
res_sb5['Method'] = ('User', 'Heuristic', 'DT', 'RNP', 'KNN', 'ORA')
res_sb5['MAE'] = (mae_usr_sb5, mae_heu_sb5, mae_dt_sb5, mae_rnp_sb5, mae_knn_sb5, mae_llm_sb5)
res_sb5['Average Error'] = (convert_to_hms(mae_usr_sb5), convert_to_hms(mae_heu_sb5), convert_to_hms(mae_dt_sb5), convert_to_hms(mae_rnp_sb5), convert_to_hms(mae_knn_sb5), convert_to_hms(mae_llm_sb5))
res_sb5['EA'] = (ea_usr_sb5, ea_heu_sb5, ea_dt_sb5, ea_rnp_sb5, ea_knn_sb5, ea_llm_sb5)
res_sb5['MAPE'] = (mape_usr_sb5/1000, mape_heu_sb5/1000, mape_dt_sb5/1000, mape_rnp_sb5/1000, mape_knn_sb5/1000, mape_llm_sb5/1000)

res_sb5

In [None]:
results = pd.concat([res, res_avg, res_sb1, res_sb2, res_sb3, res_sb4, res_sb5], axis=0)
results = results.reset_index(drop=True)
results

In [None]:
results.to_csv("metrics/pm100_9k_metrics.csv", index=False)