In [1]:
import pandas as pd

import numpy as np
import skcriteria as skc
import skcriteria.madm as madm
# See documentation at: https://scikit-criteria.readthedocs.io/_/downloads/en/0.5/pdf/
# https://scikit-criteria.quatrope.org/en/latest/tutorial/quickstart.html
from skcriteria.madm import similarity # here lives TOPSIS
from skcriteria.pipeline import mkpipe # this function is for create pipelines
from skcriteria.preprocessing import invert_objectives, scalers
from skcriteria.agg import simple

ModuleNotFoundError: No module named 'skcriteria'

In [None]:
df = pd.read_pickle("./data/pre-processed-metrics-dataframe.pkl")

# Selecting best DYTAS variant

In [None]:
dytas_variants = [variant for variant in df.scheduler.unique() if "DYTAS" in variant]

In [None]:
df_dytas = df[df.scheduler.isin(dytas_variants)].copy().reset_index(drop=True)

In [None]:
df_dytas.columns

Using a Multi-Criteria Decision Making Method (MCDM) called TOPSIS (Technique for Order of Preference by Similarity to Ideal Solution)

Aggregate the performance metrics for each algorithm

In [None]:
primary_metrics = [
    "SLR",
    "speedup",
    "exec_time",
    "efficiency_processors",
    "efficiency_cores"
]

In [None]:
df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").std()

Since the standard deviation for the results are quite high (given that the data is very left skewed) it would be a more fair comparison to use the median rather than mean value.

In [None]:
df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").mean()

In [None]:
df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").median()

In [None]:
df_dytas_medians =  df_dytas[primary_metrics + ["scheduler"]].groupby("scheduler").median()

In [None]:

# Criteria for each metric in order
objectives = [
    min,
    max,
    min,
    max,
    max
]
# Weight for each metric in order
weights = np.array([.3, .2, .2, .15, .15])

In [None]:
matrix = df_dytas_medians.values

In [None]:
# matrix = df_dytas[primary_metrics].values
decision_matrix = skc.mkdm(
    matrix,
    objectives = objectives,
    weights=weights,
    criteria=df_dytas_medians.columns.tolist(),
    alternatives=df_dytas_medians.index
)

In [None]:
decision_matrix

In [None]:
decision_matrix.weights

In [None]:
decision_matrix.plot()

In [None]:
decision_matrix.plot.wheatmap()

In [None]:
decision_matrix.plot("kde")

In [None]:
# Inverting min criteria to max criteria for
# better calculation (according to the documentation)
inverter = invert_objectives.InvertMinimize()
dmt = inverter.transform(decision_matrix)
dmt

In [None]:
# Scaling / normalizing these values now.
# By specifying both targets, the matrix and weights
# are scaled.
scaler = scalers.SumScaler(target="both")
dmt = scaler.transform(dmt)
dmt

In [None]:
dmt

In [None]:
# we are going to user matplotlib capabilities of creat multiple figures
import matplotlib.pyplot as plt

# we create 2 axis with the same y axis
fig, axs = plt.subplots(1, 2, figsize=(12, 5), sharey=True)

# in the first axis we plot the criteria KDE
dmt.plot.kde(ax=axs[0])
axs[0].set_title("Criteria")


# in the second axis we plot the weights as bars
dmt.plot.wbar(ax=axs[1])
axs[1].set_title("Weights")

# adjust the layout of the figute based on the content
fig.tight_layout()

In [None]:
dec = simple.WeightedSumModel()
rank = dec.evaluate(dmt)  # we use the tansformed version of the data
rank

In [None]:
rank.to_series().sort_values()

In [None]:
rank.e_.score

In [None]:
score = rank.to_series()
score[:] = rank.e_.score
score = score.sort_values(ascending=False)

In [None]:
score

In [None]:
pd.DataFrame(score)

In [None]:
import seaborn as sns

sns.barplot(pd.DataFrame(score), y="Alternatives", x="Rank")
# plt.xticks(rotation=90)

In [None]:
# pipe = mkpipe(
#     invert_objectives.NegateMinimize(),
#     scalers.VectorScaler(target="matrix"),
#     scalers.SumScaler(target="weights"),
#     similarity.TOPSIS()
# )
# pipe


In [None]:
# rank = pipe.evaluate(decision_matrix)
# rank

In [None]:
# rank.e_.ideal

In [None]:
# rank.e_.similarity

# TO DO: Do the same with all algorithms to see results. Afterwards, verify which of the independent variables impacted the results the most.