In [1]:
import pandas as pd

df = pd.read_pickle("./data/pre-processed-metrics-dataframe.pkl")

# Selecting best DYTAS variant

In [2]:
dytas_variants = [variant for variant in df.scheduler.unique() if "DYTAS" in variant]

In [3]:
df_dytas = df[df.scheduler.isin(dytas_variants)].copy().reset_index(drop=True)

In [4]:
df_dytas.columns

Index(['dag', 'system', 'scheduler', 'completed', 'makespan', 'exec_time',
       'run_stats.scheduling_time', 'run_stats.total_task_time',
       'run_stats.total_network_traffic', 'run_stats.total_network_time',
       'run_stats.max_used_cores', 'run_stats.max_used_memory',
       'run_stats.max_cpu_utilization', 'run_stats.max_memory_utilization',
       'run_stats.cpu_utilization', 'run_stats.memory_utilization',
       'run_stats.used_resource_count', 'run_stats.cpu_utilization_used',
       'run_stats.memory_utilization_used', 'run_stats.cpu_utilization_active',
       'run_stats.memory_utilization_active', 'run_stats.expected_makespan',
       'dag_path', 'system_path', 'dag_nodes', 'dag_edges', 'dag_density',
       'speedup', 'processor_count', 'core_count', 'efficiency_processors',
       'efficiency_cores', 'network_bandwidth', 'network_latency',
       'network_model', 'SLR', 'workflow', 'CCR', 'test_id'],
      dtype='object')

Using a Multi-Criteria Decision Making Method (MCDM) called TOPSIS (Technique for Order of Preference by Similarity to Ideal Solution)

In [5]:
import numpy as np
import skcriteria as skc
import skcriteria.madm as madm

In [12]:
primary_metrics = [
    "SLR",
    "speedup",
    "exec_time",
    "efficiency_processors",
    "efficiency_cores"
]
# Criteria for each metric in order
objectives = [
    min,
    max,
    min,
    max,
    max
]
# Weight for each metric in order
weights = np.array([1, 1, 1, .5, .5])

Aggregate the performance metrics for each algorithm

In [20]:
df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").std()

Unnamed: 0_level_0,SLR,speedup,exec_time,efficiency_processors,efficiency_cores
scheduler,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"DYTAS[navigation=All,sorting=DFS,multicore=SkipActiveProcessors]",1679.103198,2.511232,0.122863,0.263704,0.28189
"DYTAS[navigation=All,sorting=DFS,multicore=UseAllCores]",1678.650663,10.218337,0.124973,1.394708,0.279978
"DYTAS[navigation=All,sorting=Khan,multicore=SkipActiveProcessors]",1679.102825,2.499129,0.115205,0.26453,0.282238
"DYTAS[navigation=All,sorting=Khan,multicore=UseAllCores]",1678.650816,10.083256,0.128288,1.375778,0.279984
"DYTAS[navigation=Front,sorting=DFS,multicore=SkipActiveProcessors]",1679.083454,2.077904,0.123054,0.216609,0.225341
"DYTAS[navigation=Front,sorting=DFS,multicore=UseAllCores]",1678.626741,7.520499,0.072739,0.97132,0.22807
"DYTAS[navigation=Front,sorting=Khan,multicore=SkipActiveProcessors]",1679.101637,2.467386,0.13051,0.259478,0.275699
"DYTAS[navigation=Front,sorting=Khan,multicore=UseAllCores]",1678.648945,9.591898,0.083976,1.278955,0.274028


Since the standard deviation for the results are quite high (given that the data is very left skewed) it would be a more fair comparison to use the median rather than mean value.

In [19]:
df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").mean()

Unnamed: 0_level_0,SLR,speedup,exec_time,efficiency_processors,efficiency_cores
scheduler,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"DYTAS[navigation=All,sorting=DFS,multicore=SkipActiveProcessors]",49.652313,4.468551,0.073873,0.568397,0.222198
"DYTAS[navigation=All,sorting=DFS,multicore=UseAllCores]",48.033792,7.940162,0.069758,1.079818,0.267704
"DYTAS[navigation=All,sorting=Khan,multicore=SkipActiveProcessors]",49.660325,4.473977,0.074507,0.569507,0.22243
"DYTAS[navigation=All,sorting=Khan,multicore=UseAllCores]",48.047356,7.893514,0.066767,1.073457,0.267246
"DYTAS[navigation=Front,sorting=DFS,multicore=SkipActiveProcessors]",50.59646,3.543781,0.06923,0.446382,0.174729
"DYTAS[navigation=Front,sorting=DFS,multicore=UseAllCores]",49.164828,5.356505,0.051948,0.70177,0.197837
"DYTAS[navigation=Front,sorting=Khan,multicore=SkipActiveProcessors]",49.704576,4.378261,0.068898,0.556196,0.217119
"DYTAS[navigation=Front,sorting=Khan,multicore=UseAllCores]",48.112191,7.483229,0.051685,1.00327,0.257128


In [18]:
df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").median()

Unnamed: 0_level_0,SLR,speedup,exec_time,efficiency_processors,efficiency_cores
scheduler,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"DYTAS[navigation=All,sorting=DFS,multicore=SkipActiveProcessors]",1.688838,3.764629,0.037934,0.577027,0.072525
"DYTAS[navigation=All,sorting=DFS,multicore=UseAllCores]",1.071478,5.146146,0.035961,0.681571,0.136228
"DYTAS[navigation=All,sorting=Khan,multicore=SkipActiveProcessors]",1.681602,3.796969,0.039291,0.575371,0.072423
"DYTAS[navigation=All,sorting=Khan,multicore=UseAllCores]",1.070151,5.146365,0.035906,0.683948,0.136151
"DYTAS[navigation=Front,sorting=DFS,multicore=SkipActiveProcessors]",2.107824,2.926132,0.034683,0.409771,0.055717
"DYTAS[navigation=Front,sorting=DFS,multicore=UseAllCores]",1.727543,3.459695,0.031297,0.470667,0.076051
"DYTAS[navigation=Front,sorting=Khan,multicore=SkipActiveProcessors]",1.733282,3.688222,0.034012,0.558831,0.070232
"DYTAS[navigation=Front,sorting=Khan,multicore=UseAllCores]",1.141516,4.887929,0.030267,0.659165,0.122193


In [21]:
df_dytas_medians =  df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").median()

Inverting SLR and execution time metrics before normalizing

In [36]:
df_dytas_medians["SLR"] = 1 / df_dytas_medians["SLR"]
df_dytas_medians["exec_time"] = 1 / df_dytas_medians["exec_time"]

Normalizing with min-max scale

In [37]:
df_dytas_medians_min_max_scaled = (df_dytas_medians - df_dytas_medians.min()) /(df_dytas_medians.max()-df_dytas_medians.min())
df_dytas_medians_min_max_scaled

Unnamed: 0_level_0,SLR,speedup,exec_time,efficiency_processors,efficiency_cores
scheduler,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"DYTAS[navigation=All,sorting=DFS,multicore=SkipActiveProcessors]",0.255856,0.377662,0.120008,0.61003,0.208775
"DYTAS[navigation=All,sorting=DFS,multicore=UseAllCores]",0.997483,0.999901,0.310582,0.991329,1.0
"DYTAS[navigation=All,sorting=Khan,multicore=SkipActiveProcessors]",0.261395,0.392228,0.0,0.603988,0.20751
"DYTAS[navigation=All,sorting=Khan,multicore=UseAllCores]",1.0,1.0,0.316186,1.0,0.999043
"DYTAS[navigation=Front,sorting=DFS,multicore=SkipActiveProcessors]",0.0,0.0,0.445643,0.0,0.0
"DYTAS[navigation=Front,sorting=DFS,multicore=UseAllCores]",0.227018,0.240318,0.856601,0.222106,0.252567
"DYTAS[navigation=Front,sorting=Khan,multicore=SkipActiveProcessors]",0.222852,0.343248,0.520553,0.543664,0.18029
"DYTAS[navigation=Front,sorting=Khan,multicore=UseAllCores]",0.873007,0.883599,1.0,0.90961,0.825679


In [41]:
matrix = df_dytas_medians_min_max_scaled.values

In [49]:
# matrix = df_dytas[primary_metrics].values
decision_matrix = skc.mkdm(
    matrix,
    objectives = [1, 1, 1, 1, 1],
    weights=weights
)

In [50]:
decision_matrix

  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())


Unnamed: 0,C0[▲ 1.0],C1[▲ 1.0],C2[▲ 1.0],C3[▲ 0.5],C4[▲ 0.5]
A0,0.255856,0.377662,0.120008,0.61003,0.208775
A1,0.997483,0.999901,0.310582,0.991329,1.0
A2,0.261395,0.392228,0.0,0.603988,0.20751
A3,1.0,1.0,0.316186,1.0,0.999043
A4,0.0,0.0,0.445643,0.0,0.0
A5,0.227018,0.240318,0.856601,0.222106,0.252567
A6,0.222852,0.343248,0.520553,0.543664,0.18029
A7,0.873007,0.883599,1.0,0.90961,0.825679


In [44]:
# See documentation at: https://scikit-criteria.readthedocs.io/_/downloads/en/0.5/pdf/
# https://scikit-criteria.quatrope.org/en/latest/tutorial/quickstart.html
from skcriteria.madm import similarity # here lives TOPSIS
from skcriteria.pipeline import mkpipe # this function is for create pipelines
from skcriteria.preprocessing import invert_objectives, scalers

In [51]:
pipe = mkpipe(
    invert_objectives.NegateMinimize(),
    scalers.VectorScaler(target="matrix"),
    scalers.SumScaler(target="weights"),
    similarity.TOPSIS()
)
pipe


<SKCPipeline [steps=[('negateminimize', <NegateMinimize []>), ('vectorscaler', <VectorScaler [target='matrix']>), ('sumscaler', <SumScaler [target='weights']>), ('topsis', <TOPSIS [metric='euclidean']>)]]>

In [52]:
rank = pipe.evaluate(decision_matrix)
rank

Alternatives,A0,A1,A2,A3,A4,A5,A6,A7
Rank,6,3,7,2,8,4,5,1
