In [1]:
import json
import os

import numpy as np
import pandas as pd
from pandas import DataFrame

from simulation.mapek.simulation_parameters import portfolios

sim_directory = "simulations"
predictions_file = "predictions.csv"
measurements_file = "measurements.csv"
results_file = "results.json"
parameters_file = "parameters.json"

strategies = {
    ("deploy_once", "NoUpdateStrategy", None): "static",
    ("fixed_interval", "RetrainStrategy", 15724800): "retrain_long",
    ("fixed_interval", "RetrainStrategy", 7257600): "retrain_short",
    # ("fixed_interval", "FineTuneStrategy", 15724800): "fine_tune_long",
    # ("fixed_interval", "FineTuneStrategy", 7257600): "fine_tune_short",
}


def map_portfolio(sim_portfolio: list[str]) -> str:
    if sim_portfolio is None:
        return "-"

    sim_models = tuple(sorted(sim_portfolio))
    for portfolio, params in portfolios.items():
        if tuple(sorted(params['models'])) == sim_models:
            return portfolio
    return ", ".join(sorted(sim_portfolio))


def map_strategy(bs_goal: dict, learning_strategy: str = "NoUpdateStrategy") -> str:
    if bs_goal is None:
        return "-"

    if learning_strategy is None:
        learning_strategy = "NoUpdateStrategy"
    deployment_strategy = bs_goal.get("type")
    deployment_interval = bs_goal.get("interval_seconds", None)

    return strategies.get((deployment_strategy, learning_strategy, deployment_interval), "-")


def map_violation_threshold(sn_goal: dict) -> str:
    if sn_goal is None:
        return "-"

    return f"{sn_goal['max_violations']}v / {(sn_goal['time_window_s'] / 3600):.0f}h"


def calculate_rmse(measurements: pd.DataFrame, predictions: pd.DataFrame) -> float:
    m, p = measurements.align(predictions, axis="rows", join="outer", fill_value=0)
    return np.sqrt(np.mean(np.square(m['TL'] - p['TL'])))


def to_metrics_dict(rmse: float, results: dict) -> dict:
    return {
        "rmse": rmse,
        "violation_rate": (results["violations"] / results["measurements"]) * 100,
        "mallocs": results["total_memory_allocations"],
        "max_memory": results["peak_memory_allocated_MB"],
        "cpu_time": results["total_cpu_time_s"],
        "data_sent": results["total_data_sent_B"],
        "data_received": results["total_data_received_B"],
        "transmission_rate": ((results["violations_sent_to_BS"]) / results["violations"]) * 100,
        "switch_rate": ((results["configuration_updates"] - 1) / results["violations"]) * 100,
    }


def to_config_dict(parameters: dict) -> dict:
    portfolio = map_portfolio(parameters.get("portfolio"))

    sn_goal = next((g for g in parameters.get("sn_goals") if g.get("type") == "violation_rate"), None)
    violation_threshold: str = map_violation_threshold(sn_goal)

    bs_goal = next((g for g in parameters.get("bs_goals")), None)
    learning_strategy = parameters.get("learning_strategy")
    strategy: str = map_strategy(bs_goal, learning_strategy)

    return {
        "portfolio": portfolio,
        "strategy": strategy,
        "violation_threshold": violation_threshold,
    }


simulation_results = []

for root, dirs, files in os.walk(sim_directory):

    print(f"Parsing simulation results...")
    for dir in dirs:
        print(f"    📂 {root}/{dir}...")

        results_path = os.path.join(root, dir, results_file)

        if not os.path.isfile(results_path):
            print(f"        ⚠ No simulation results file found. Skipping.")
            continue

        with open(results_path, "r") as f:
            results: dict = json.load(f)

        measurements: DataFrame = pd.read_csv(os.path.join(root, dir, measurements_file), index_col=0, parse_dates=True)
        predictions: DataFrame = pd.read_csv(os.path.join(root, dir, predictions_file), index_col=0, parse_dates=True)
        rmse: float = calculate_rmse(measurements, predictions)

        metrics: dict = to_metrics_dict(rmse, results)

        with open(os.path.join(root, dir, parameters_file), "r") as f:
            parameters: dict = json.load(f)
        config: dict = to_config_dict(parameters)

        simulation_results.append(config | metrics)

        # Don't iterate on nested subfolders
    dirs.clear()
    print(f"🌟 Parsing finished")

results_df = pd.DataFrame(simulation_results)

results_df

Using mse_weighted
Parsing simulation results...
    📂 simulations/linz_2010_2019_conv_lstm_retrain_long_1v1h...
    📂 simulations/linz_2010_2019_conv_lstm_retrain_short_1v1h...
    📂 simulations/linz_2010_2019_conv_lstm_static_1v1h...
    📂 simulations/linz_2010_2019_mixed_arch_static_1v1h...
    📂 simulations/linz_2010_2019_mixed_arch_static_2v1h...
    📂 simulations/linz_2010_2019_mixed_arch_static_2v2h...
    📂 simulations/linz_2010_2019_mixed_arch_static_2v3h...
    📂 simulations/linz_2010_2019_mixed_arch_static_3v1h...
    📂 simulations/linz_2010_2019_mixed_arch_static_3v2h...
    📂 simulations/linz_2010_2019_mixed_arch_static_3v3h...
    📂 simulations/linz_2010_2019_simple_dense_retrain_long_1v1h...
    📂 simulations/linz_2010_2019_simple_dense_retrain_short_1v1h...
    📂 simulations/linz_2010_2019_simple_dense_static_1v1h...
    📂 simulations/linz_2010_2019_simple_lstm_retrain_long_1v1h...
    📂 simulations/linz_2010_2019_simple_lstm_retrain_short_1v1h...
    📂 simulations/linz

Unnamed: 0,portfolio,strategy,violation_threshold,rmse,violation_rate,mallocs,max_memory,cpu_time,data_sent,data_received,transmission_rate,switch_rate
0,linz_2010_2019_conv_lstm,retrain_long,1v / 1h,0.548686,5.278158,3285302,196.199,18317.312,3015303.0,2038824.0,100.000000,0.071994
1,linz_2010_2019_conv_lstm,retrain_short,1v / 1h,0.556212,5.514706,6363298,351.042,57831.656,3258988.0,2756821.0,100.000000,0.137812
2,linz_2010_2019_conv_lstm,static,1v / 1h,0.546556,5.284808,227968,20.099,2115.672,3718917.0,1460112.0,100.000000,0.000000
3,linz_2010_2019_mixed_arch,static,1v / 1h,0.536456,4.793662,739528,52.245,2428.969,2952704.0,854490.0,44.094332,55.905668
4,linz_2010_2019_mixed_arch,static,2v / 1h,0.540840,4.977960,217698,21.412,1027.797,2560208.0,324989.0,9.522901,11.030534
...,...,...,...,...,...,...,...,...,...,...,...,...
98,vienna_2019_2019_simple_dense,retrain_short,1v / 1h,0.536909,4.542864,1997695,116.002,3264.328,3788197.0,1463185.0,100.000000,0.167294
99,vienna_2019_2019_simple_dense,static,1v / 1h,0.557817,5.565055,246776,26.184,2163.656,3832375.0,1445339.0,100.000000,0.000000
100,vienna_2019_2019_simple_lstm,retrain_long,1v / 1h,0.546274,5.046360,1467938,95.582,3192.297,3779704.0,1566749.0,100.000000,0.075301
101,vienna_2019_2019_simple_lstm,retrain_short,1v / 1h,0.546204,4.861111,2321803,151.644,5056.266,3868514.0,1801892.0,100.000000,0.156342


In [2]:
file_name = "results_analysis"
folder = "analysis"

import pandas as pd
import os

In [3]:
print(f"💾 Saving results as PICKLE {folder}/{file_name}.pickle")
results_df.to_pickle(os.path.join(folder, "%s.pickle" % file_name))
print(f"💾 Saving results as CSV {folder}/{file_name}.csv")
results_df.to_csv(os.path.join(folder, "%s.csv" % file_name))

💾 Saving results as PICKLE analysis/results_analysis.pickle
💾 Saving results as CSV analysis/results_analysis.csv


In [4]:
def save_results(results: pd.DataFrame, name: str) -> None:
    print(f"💾 Saving results as PICKLE {folder}/{name}.pickle")
    results.to_pickle(os.path.join(folder, f"{name}.pickle"))


def save_latex_table(results: pd.DataFrame, name: str) -> None:
    styler = results.style.format(precision=3, escape="latex")  # Force 3-digit precision and escape values
    styler.format_index(escape="latex", axis=1)  # Escape column headers
    styler.hide(axis="index")  # Hide index column
    print(f"💾 Saving results as LaTeX table {folder}/{name}.tex")
    styler.to_latex(os.path.join(folder, "%s.tex" % name))


df: pd.DataFrame = pd.read_pickle(os.path.join(folder, "%s.pickle" % file_name))
best_cpu = df["cpu_time"].min()
df["cpu_time_diff"] = ((df["cpu_time"] - best_cpu) / best_cpu) * 100
best_mem = df["max_memory"].min()
df["max_memory_diff"] = ((df["max_memory"] - best_mem) / best_mem) * 100
df["total_data_kb"] = (df['data_sent'] + df['data_received']) / 1000
df["mallocs_k"] = (df['mallocs']) / 1000
best_data = df['total_data_kb'].min()
df["total_data_diff"] = ((df['total_data_kb'] - best_data) / best_data) * 100

save_results(df, file_name)

config_columns = ["portfolio", "strategy", "violation_threshold"]
rq1_metrics = ["rmse", "violation_rate"]
rq2_metrics = ["transmission_rate", "switch_rate"]
rq3_metrics = ["cpu_time", "cpu_time_diff", "mallocs", "max_memory", "max_memory_diff", "data_sent", "data_received",
               "total_data_kb"]

# RQ1
rq1_df: pd.DataFrame = df[config_columns + rq1_metrics]
rq1_name = "rq1_%s" % file_name
save_results(rq1_df, rq1_name)
save_latex_table(rq1_df.sort_values(by=['rmse', 'violation_rate'], ascending=True), "rq1_rmse")
save_latex_table(rq1_df.sort_values(by=['violation_rate', 'rmse'], ascending=True), "rq1_violations")

# RQ2
rq2_df: pd.DataFrame = df[config_columns + rq2_metrics]
rq2_name = "rq2_%s" % file_name
save_results(rq2_df, rq2_name)
save_latex_table(rq2_df.sort_values(by=rq2_metrics, ascending=True), "rq2_transmission_rate")
save_latex_table(rq2_df.sort_values(by=rq2_metrics[::-1], ascending=True), "rq2_switch_rate")

# RQ3
rq3_df: pd.DataFrame = df[config_columns + rq3_metrics]
rq3_name = "rq3_%s" % file_name
save_results(rq3_df, rq3_name)
save_latex_table(rq3_df.sort_values(by=['cpu_time', 'mallocs'], ascending=True), "rq3_cpu")
save_latex_table(rq3_df.sort_values(by=['max_memory', 'mallocs'], ascending=True), "rq3_memory")
save_latex_table(rq3_df.sort_values(by=['total_data_kb'], ascending=True), "rq3_transmission")


💾 Saving results as PICKLE analysis/results_analysis.pickle
💾 Saving results as PICKLE analysis/rq1_results_analysis.pickle
💾 Saving results as LaTeX table analysis/rq1_rmse.tex
💾 Saving results as LaTeX table analysis/rq1_violations.tex
💾 Saving results as PICKLE analysis/rq2_results_analysis.pickle
💾 Saving results as LaTeX table analysis/rq2_transmission_rate.tex
💾 Saving results as LaTeX table analysis/rq2_switch_rate.tex
💾 Saving results as PICKLE analysis/rq3_results_analysis.pickle
💾 Saving results as LaTeX table analysis/rq3_cpu.tex
💾 Saving results as LaTeX table analysis/rq3_memory.tex
💾 Saving results as LaTeX table analysis/rq3_transmission.tex
