# Compute Statistics over Runtime Measurements

## Default Values for Papermill Parameters

In [6]:
PARAM_MEASUREMENTS_PATH = "../outputs/merged_result_set.csv"
PARAM_OUT_FILENAME_PREFIX = "statistics_"

## Import and Set Parameters

In [None]:
from subroc import util

import pandas as pd
import os

# fill environment variables into params
PARAM_MEASUREMENTS_PATH = util.prepend_experiment_output_path(PARAM_MEASUREMENTS_PATH)

# get environment variables
STAGE_OUTPUT_PATH = os.environ.get("STAGE_OUTPUT_PATH", "../outputs")

## Read the Measurements

In [8]:
runtimes_df = pd.read_csv(f"{PARAM_MEASUREMENTS_PATH}")

## Compute Statistics of Runtime Measurements

In [None]:
groupby_columns = ["qf_name", "optimistic_estimate", "depth", "optimization_mode"]
df_groupby = runtimes_df.groupby(groupby_columns, as_index=False)

time_means = df_groupby["time"].mean()
time_medians = df_groupby["time"].median()
time_mins = df_groupby["time"].min()
time_maxs = df_groupby["time"].max()
time_stds = df_groupby["time"].std()

time_statistics_df = time_means
time_statistics_df.rename(columns={"time": "time_mean"}, inplace=True)
time_statistics_df["time_median"] = time_medians["time"]
time_statistics_df["time_min"] = time_mins["time"]
time_statistics_df["time_max"] = time_maxs["time"]
time_statistics_df["time_std"] = time_stds["time"]
time_statistics_df["num_visited_subgroups"] = df_groupby["num_visited_subgroups"].mean()["num_visited_subgroups"]

## Write the Augmentation Result

In [10]:
input_file_basename = os.path.basename(PARAM_MEASUREMENTS_PATH)
time_statistics_df.to_csv(f"{STAGE_OUTPUT_PATH}/{PARAM_OUT_FILENAME_PREFIX}{input_file_basename}", index=False)