# K2DSE Benchmarks

## What do we need to show 


### Goal 0 - re-implementation

First, I need to make sure my re-implementation is identical. Not trivial as we count SD differently.

#### 0.1 Count that the number of SD found is the same between OldKDSE,DKDSE,DKDSEA (the capstone implementation) and KDSE,K2DSE,K2DSEA (the paper re-implementation).

#### 0.2 Verify that the algorithm isnt slower

We are faster on small instances, need to verify on larger ones.

#### 0.3 Check that the thread implementation is identical and also faster.

So far I found duplicated, this implementation is no going to be used

### Goal 1 - Execution Time and size of explored space by DSE methods. 

#### generate the table 1 with KDSE,K2DSE,K2DSEA (no multi-thread).


### Goal 2 - Pareto fronts and explored space 

#### generate the Fig 3 with K2DSE,K2DSEA,PDSE (no multi-thread).

## Prepare data

In [None]:
import dsereader

logdir = "../kdse2023_log/"

appnames = ["bipartite",
            "Echo",
            "fig8",
            "H264",
            "modem",
    #        "sample",
            "satellite",
            "BlackScholes",
    #        "example",
            "h263decoder",
            "JPEG2000",
            "PDectect",
            "samplerate"]


methods = {
    2 : { "name" : "OldKDSE" ,   "color" : "black"}, # "-aKPeriodicThroughputwithDSE"
    3 : { "name" : "DeepKDSE" ,  "color" : "red"}, # "-aDeepKPeriodicThroughputwithDSE"
    4 : { "name" : "DeepKDSEA" , "color" : "green"}, # "-aDeepKPeriodicThroughputwithDSE -papprox=1"
  #  5 : { "name" : "KDSE" ,      "color" : "black"}, # "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE"
  #  6 : { "name" : "K2DSE" ,     "color" : "black"}, # "-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE"
  #  8 : { "name" : "KDSE4" ,     "color" : "black"}, # "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=4"
  #  9 : { "name" : "K2DSE4" ,    "color" : "black"}, # "-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE -pthread=4"
}

In [None]:
## Collect the max throughput for each application
import pandas as pd 

max_throughput = {}
for app in appnames :
    for line in open(logdir + "/" + app + "_1.txt").read().split("\n"):
        if 'KPeriodic Throughput is' in line :
            th = float(line.split(" ")[-1])
            max_throughput[app] = th
            print (f"{app} {th}")
task_count = {}
for app in appnames :
    for line in open(logdir + "/" + app + "_0.txt").read().split("\n"):
        if 'Task count' in line :
            count = int(line.split(" ")[-1])
            task_count[app] = count
            print (f"{app} {count}")

In [None]:
appnames

In [None]:
dsereader.plot_all(logdir, graphs=appnames, methods=methods, plotfunc=dsereader.plot_app_pareto)

In [None]:
import matplotlib.pyplot as plt
for i, name in zip(range(1, len(appnames) + 1), appnames):
    plt.figure()
    dsereader.plot_app_pareto(logdir, appname=name, methods=methods)
    #for m in methods.keys():
    #    infos = methods[m]
    #    try :
    #        df = dsereader.load_app_dse(logdir, name, m)
    #        dsereader.plot_pareto(df, dsecolor=infos["color"], dsename=infos["name"])
    #    except :
    #        pass

In [None]:
import math
import datetime
def time_in_msec(time_msec): # copy pasted from https://stackoverflow.com/questions/48063828/convert-duration-format-from-float-to-monthdayshoursminutesseconds-in-python
    time_sec = int(time_msec // 1000)
    delta = datetime.timedelta(seconds=time_sec)
    delta_str = str(delta)[-8:]
    hours, minutes, seconds = [int(val) for val in delta_str.split(":", 3)]
    weeks = delta.days // 7
    days = delta.days % 7
    return "{}days {}h {}min {}.{}sec ({})".format(days, hours, minutes, seconds,int(time_msec) & 1000, time_msec)

time_in_msec(100000.10)


In [None]:
def gen_dsetable(logdir, graphs, methods):
    list_of_dict = []
    #res = { "name" : [] }
    #for m in methods:
    #    #res[methods[m]["name"]] = []
    for i, name in zip(range(1, len(graphs) + 1), graphs):
        #res["name"].append(name)
        for m in methods:
            method_name = methods[m]["name"]
            try :
                df = dsereader.load_app_dse(logdir, name, m, cols = ["throughput", "storage distribution size","cumulative duration"])
                sd_count = df["storage distribution size"].count() if "throughput" in df else "-"
                max_th = df["throughput"].max() 
                duration = df["cumulative duration"].max() 
                finished = math.isclose(max_th, max_throughput[name], rel_tol=1e-5)
                pareto = dsereader.extract_pareto(df)
                pareto_count = pareto["storage distribution size"].count() if finished else "-"
                #res[methods[m]["name"]].append(sd_count)
                list_of_dict += [{"graph" : name, 
                                  "#task" : task_count[name],
                                  "method" : method_name, 
                                   "#SD" : sd_count,
                                   "#Pareto" : pareto_count,
                                  "Duration" : time_in_msec(duration), 
                                  "Finished" : finished}]
            except FileNotFoundError:
                list_of_dict += [{"graph" : name, 
                              "#task" : task_count[name],
                              "method" : method_name,
                              "#SD" : "-", 
                              "#Pareto" : "-",
                              "Duration" : "-", 
                              "Finished" : False}]
            
    #df = pd.DataFrame(res)[["name"] + [methods[m]["name"] for m in methods]]    
    #df = df.rename (columns = {
    #    "name" : "Graph"
    #})

    return list_of_dict

In [None]:
l = gen_dsetable(logdir, graphs=appnames, methods=methods)

In [None]:
df = pd.DataFrame(l).set_index(["graph","#task","method"])
df

In [None]:
colformat = "|".join([""] + ["l"] * df.index.nlevels + ["r"] * df.shape[1] + [""])
               
latex = df.to_latex(
        float_format="{:0.1f}".format # , column_format=colformat, index=False
    )
print(latex)

In [None]:
l