In [1]:
import glob
import os
import pandas as pd

import plot

In [2]:
def pre_process(df):
    # Simplify the workers name
    df["hostname"] = df.worker
    for i, worker in enumerate(df.worker.unique(), 1):
        df.loc[df.worker == worker, "worker_name"] = f"worker{i:02d}"

    # Rename function
    func_map = {}
    df.func = df.func.apply(lambda x: func_map.get(x, x))

    # Simplify the thread number for each worker
    thread_worker = {
        w + "::" + str(t): i + 1
        for w in df.worker_name.unique()
        for i, t in enumerate(df[df.worker_name == w].thread.unique())
    }
    df["worker_thread"] = df.worker_name + "::" + df.thread.astype(str)
    df["thread_number"] = df.worker_thread.map(lambda x: thread_worker[x])
    df["worker_thread"] = df.worker_name + "::thread" + df.thread_number.astype(str)
    df = df.sort_values(by=["worker_name", "thread_number"], ascending=[True, True])
    return df

In [3]:
filenames = sorted(glob.glob("../results/benchmarks/*/summary*.csv"))

experiments = {}
for x in filenames:
    path = x.split("/")
    experiment = path[-2]
    framework = experiment.split(":")[0]
    fout = f"{'/'.join(path[:-3])}/output/{path[-2]}/gantt"
    if experiment not in experiments:
        experiments[experiment] = [(x, f"{fout}-1.html", framework, experiment)]
    else:
        experiments[experiment].append(
            (x, f"{fout}-{len(experiments[experiment])+1}.html", framework, experiment)
        )

experiments = [x for k, v in experiments.items() for x in v]

In [5]:
col_name = ["func", "start", "end", "filename", "worker", "thread", "process"]

for fin, fout, framework, experiment in experiments:
    try:
        plot.gantt(
            pd.read_csv(fin, header=None, names=col_name),
            pre_process=pre_process,
            group="worker_thread",
            x_limit=None,
            save_name=fout,
            framework=framework,
            ylabel="Workers",
            title=experiment,
        )
    except:
        print(fin)