In [1]:
import os
import glob
import pandas as pd
import re
import seaborn as sns
import matplotlib.pyplot as plt

APP_FILE = "application"
INFRA_FILE = "infrastructure"
SIM_FILE = "simulation"
base_path = "" # path to the directory containing the Ray Tune results

In [2]:
def parse_directory_name(dirname):
    # Extract the part containing the parameters
    params_part = re.search(r"load=.*", dirname).group(0)
    # Extract only the parameters, excluding the timestamp at the end
    params_part = re.split(r"_\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}", params_part)[0]

    # Find all param=value pairs
    pattern = r"(\w+)=([^,]+)"
    matches = re.findall(pattern, params_part)
    params = {param: value for param, value in matches}
    return params

In [3]:
def read_parquets(base_path):
    app_combined_df = pd.read_parquet(os.path.join(base_path, f"{APP_FILE}.parquet"))
    infra_combined_df = pd.read_parquet(
        os.path.join(base_path, f"{INFRA_FILE}.parquet")
    )
    sim_combined_df = pd.read_parquet(os.path.join(base_path, f"{SIM_FILE}.parquet"))

    return app_combined_df, infra_combined_df, sim_combined_df

In [7]:
def process_directories(base_path):
    directories = glob.glob(os.path.join(base_path, "*"))

    app_dfs = []
    infra_dfs = []
    sim_dfs = []

    dir_num = 1
    tot_dir = len(directories)

    for directory in directories:
        print(f"Processing directory {dir_num}/{tot_dir}", end="\r")
        if not os.path.isdir(directory):
            continue
        params = parse_directory_name(os.path.basename(directory))
        stats_path = os.path.join(directory, "output/stats")

        try:
            app_df = pd.read_csv(os.path.join(stats_path, f"{APP_FILE}.csv"))
            infra_df = pd.read_csv(os.path.join(stats_path, f"{INFRA_FILE}.csv"))
            sim_df = pd.read_csv(os.path.join(stats_path, f"{SIM_FILE}.csv"))
        except FileNotFoundError:
            print(f"Files not found in {stats_path}")
            continue

        # Add config params as new columns
        for param, value in params.items():
            app_df[param] = value
            infra_df[param] = value
            sim_df[param] = value

        app_dfs.append(app_df)
        infra_dfs.append(infra_df)
        sim_dfs.append(sim_df)
        dir_num += 1

    # Concatenate all DataFrames
    app_combined_df = pd.concat(app_dfs, ignore_index=True)
    infra_combined_df = pd.concat(infra_dfs, ignore_index=True)
    sim_combined_df = pd.concat(sim_dfs, ignore_index=True)

    app_combined_df.drop(columns=["event_id"], inplace=True)
    infra_combined_df.drop(columns=["event_id"], inplace=True)
    sim_combined_df.drop(columns=["event_id"], inplace=True)

    app_combined_df.to_parquet(
        os.path.join(base_path, f"{APP_FILE}.parquet"),
        index=False,
        compression="brotli",
    )
    infra_combined_df.to_parquet(
        os.path.join(base_path, f"{INFRA_FILE}.parquet"),
        index=False,
        compression="brotli",
    )
    sim_combined_df.to_parquet(
        os.path.join(base_path, f"{SIM_FILE}.parquet"),
        index=False,
        compression="brotli",
    )

process_directories(base_path)

Processing directory 756/762

In [8]:
app_df, infra_df, sim_df = read_parquets(base_path)
dfs = {"application": app_df, "infrastructure": infra_df, "simulation": sim_df}

In [10]:
for name, df in dfs.items():
    df["policy"] = df["policy"].replace(
            {
                "degrade_0_4": "degrade to 40%",
                "degrade_0_5": "degrade to 50%",
                "degrade_0_6": "degrade to 60%",
                "kill_0_01": "kill 1%",
                "kill_0_05": "kill 5%",
                "kill_0_1": "kill 10%",
            }
        )
    df["topology"] = df["topology"].replace({"random_0_5": "random"})

In [13]:
old_app_df = pd.read_parquet("./results/application.parquet")
old_infra_df = pd.read_parquet("./results/infrastructure.parquet")
old_sim_df = pd.read_parquet("./results/simulation.parquet")

In [15]:
new_app_df = pd.concat([old_app_df, app_df], ignore_index=True)
new_infra_df = pd.concat([old_infra_df, infra_df], ignore_index=True)
new_sim_df = pd.concat([old_sim_df, sim_df], ignore_index=True)

In [16]:
new_dfs = {"application": new_app_df, "infrastructure": new_infra_df, "simulation": new_sim_df}

In [16]:
new_app_df.to_parquet("./results/application.parquet", compression="brotli")
new_infra_df.to_parquet("./results/infrastructure.parquet", compression="brotli")
new_sim_df.to_parquet("./results/simulation.parquet", compression="brotli")