In [79]:
import pandas as pd

In [80]:
files_info = [
    ("../data/baselineSingularDatacenterHeterogenous.csv", "Baseline", "Singular", "Heterogenous"),
    ("../data/baselineSingularDatacenterHomogenous.csv", "Baseline", "Singular", "Homogenous"),
    ("../data/baselineMultiDatacenterHeterogenous.csv", "Baseline", "Multi", "Heterogenous"),
    ("../data/baselineMultiDatacenterHomogenous.csv", "Baseline", "Multi", "Homogenous"),

    ("../data/roundRobinSingularDatacenterHeterogenous.csv", "RoundRobin", "Singular", "Heterogenous"),
    ("../data/roundRobinSingularDatacenterHomogenous.csv", "RoundRobin", "Singular", "Homogenous"),
    ("../data/roundRobinMultiDatacenterHeterogenous.csv", "RoundRobin", "Multi", "Heterogenous"),
    ("../data/roundRobinMultiDatacenterHomogenous.csv", "RoundRobin", "Multi", "Homogenous"),

    ("../data/GASingularDatacenterHeterogenous.csv", "GA", "Singular", "Heterogenous"),
    ("../data/GASingularDatacenterHomogenous.csv", "GA", "Singular", "Homogenous"),
    ("../data/GAMultiDatacenterHeterogenous.csv", "GA", "Multi", "Heterogenous"),
    ("../data/GAMultiDatacenterHomogenous.csv", "GA", "Multi", "Homogenous"),
]

df_list = []
for filepath, algorithm, topology, heterogeneity in files_info:
    df = pd.read_csv(filepath)
    df["Algorithm"] = algorithm
    df["Topology"] = topology
    df["Heterogeneity"] = heterogeneity
    df_list.append(df)

df = pd.concat(df_list, ignore_index=True)

df.head(5)


Unnamed: 0,Cloudlet ID,User ID,Status,Data Center ID,Submission Time,Start Time,Finish Time,Cloudlet Length,Processing Cost,File Size,CPU Utilization,RAM Utilization,BW Utilization,Waiting Time,Algorithm,Topology,Heterogeneity
0,5,3,SUCCESS,2,0.01,0.01,3.12,3113,131.8,691,UtilizationModelStochastic,UtilizationModelStochastic,UtilizationModelStochastic,0.0,Baseline,Singular,Heterogenous
1,0,3,SUCCESS,2,0.01,0.01,3.49,3485,61.8,317,UtilizationModelStochastic,UtilizationModelStochastic,UtilizationModelStochastic,0.0,Baseline,Singular,Heterogenous
2,2,3,SUCCESS,2,0.01,0.01,4.9,4895,114.9,709,UtilizationModelStochastic,UtilizationModelStochastic,UtilizationModelStochastic,0.0,Baseline,Singular,Heterogenous
3,7,3,SUCCESS,2,0.01,0.01,7.04,7028,118.0,746,UtilizationModelStochastic,UtilizationModelStochastic,UtilizationModelStochastic,0.0,Baseline,Singular,Heterogenous
4,11,3,SUCCESS,2,0.01,7.04,9.16,2118,66.3,337,UtilizationModelStochastic,UtilizationModelStochastic,UtilizationModelStochastic,7.03,Baseline,Singular,Heterogenous


In [81]:
df.columns

Index(['Cloudlet ID', 'User ID', 'Status', 'Data Center ID', 'Submission Time',
       'Start Time', 'Finish Time', 'Cloudlet Length', 'Processing Cost',
       'File Size', 'CPU Utilization', 'RAM Utilization', 'BW Utilization',
       'Waiting Time', 'Algorithm', 'Topology', 'Heterogeneity'],
      dtype='object')

## Makespan

In [82]:

max_finish_times = (
    df
    .groupby(["Topology", "Heterogeneity", "Algorithm"])["Finish Time"]
    .max()
    .reset_index()
    .rename(columns={"Finish Time": "Max Finish Time"})
)

max_finish_times = max_finish_times.sort_values(
    by=["Topology", "Heterogeneity", "Max Finish Time"],
    ascending=[True, True, True] 
)

print(max_finish_times)


    Topology Heterogeneity   Algorithm  Max Finish Time
1      Multi  Heterogenous          GA          1035.51
2      Multi  Heterogenous  RoundRobin          1387.01
0      Multi  Heterogenous    Baseline          1431.81
4      Multi    Homogenous          GA           400.03
3      Multi    Homogenous    Baseline           668.03
5      Multi    Homogenous  RoundRobin           668.04
6   Singular  Heterogenous    Baseline          2030.30
8   Singular  Heterogenous  RoundRobin          2072.29
7   Singular  Heterogenous          GA          2962.45
9   Singular    Homogenous    Baseline          1000.01
11  Singular    Homogenous  RoundRobin          1000.01
10  Singular    Homogenous          GA          1500.01


## Throughput

In [83]:
def calc_throughput(group):
    makespan = group["Finish Time"].max()
    cloudlet_count = len(group)
    throughput = cloudlet_count / makespan if makespan > 0 else 0
    return pd.Series({
        "Makespan": makespan,
        "Cloudlets": cloudlet_count,
        "Throughput": throughput
    })

throughput_df = (
    df
    .groupby(["Topology", "Heterogeneity", "Algorithm"])
    .apply(calc_throughput)
    .reset_index()
)

throughput_df = throughput_df.sort_values(
    by=["Topology", "Heterogeneity", "Throughput"],
    ascending=[True, True, False]
)

print(throughput_df)


    Topology Heterogeneity   Algorithm  Makespan  Cloudlets  Throughput
1      Multi  Heterogenous          GA   1035.51     1000.0    0.965708
2      Multi  Heterogenous  RoundRobin   1387.01     1000.0    0.720975
0      Multi  Heterogenous    Baseline   1431.81     1000.0    0.698417
4      Multi    Homogenous          GA    400.03     1000.0    2.499813
3      Multi    Homogenous    Baseline    668.03     1000.0    1.496939
5      Multi    Homogenous  RoundRobin    668.04     1000.0    1.496916
6   Singular  Heterogenous    Baseline   2030.30     1000.0    0.492538
8   Singular  Heterogenous  RoundRobin   2072.29     1000.0    0.482558
7   Singular  Heterogenous          GA   2962.45     1000.0    0.337558
9   Singular    Homogenous    Baseline   1000.01     1000.0    0.999990
11  Singular    Homogenous  RoundRobin   1000.01     1000.0    0.999990
10  Singular    Homogenous          GA   1500.01     1000.0    0.666662


  .apply(calc_throughput)


# Visualisations

In [84]:
df.columns

Index(['Cloudlet ID', 'User ID', 'Status', 'Data Center ID', 'Submission Time',
       'Start Time', 'Finish Time', 'Cloudlet Length', 'Processing Cost',
       'File Size', 'CPU Utilization', 'RAM Utilization', 'BW Utilization',
       'Waiting Time', 'Algorithm', 'Topology', 'Heterogeneity'],
      dtype='object')

In [85]:
df.drop(['Status', 'Data Center ID', 'CPU Utilization', 'RAM Utilization', 'BW Utilization'], axis=1).columns

Index(['Cloudlet ID', 'User ID', 'Submission Time', 'Start Time',
       'Finish Time', 'Cloudlet Length', 'Processing Cost', 'File Size',
       'Waiting Time', 'Algorithm', 'Topology', 'Heterogeneity'],
      dtype='object')