# Compute Speedups for Runtime Measurements and Numbers of Visited Subgroups

## Default Values for Papermill Parameters

In [None]:
PARAM_RUNTIME_STATISTICS_PATH = "../outputs/statistics_merged_result_set.csv"
PARAM_OUT_FILENAME_PREFIX = "speedups_"
PARAM_RUNTIME_STATISTIC = "time_mean"

## Import and Set Parameters

In [None]:
from subroc import util

import pandas as pd
import os

# fill environment variables into params
PARAM_RUNTIME_STATISTICS_PATH = util.prepend_experiment_output_path(PARAM_RUNTIME_STATISTICS_PATH)

# get environment variables
STAGE_OUTPUT_PATH = os.environ.get("STAGE_OUTPUT_PATH", "../outputs")

## Read the Runtime Statistics

In [None]:
statistics_df = pd.read_csv(f"{PARAM_RUNTIME_STATISTICS_PATH}")

## Compute Speedups of Runtime Measurements and Numbers of Visited Subgroups

In [None]:
df_groupby = statistics_df.groupby("optimistic_estimate", as_index=False)
speedups = []

for i in df_groupby.groups.get(False):
    row_no_oe = statistics_df.iloc[i]
    row_with_oe = statistics_df[((statistics_df["qf_name"] == row_no_oe["qf_name"]) & (statistics_df["optimistic_estimate"] != row_no_oe["optimistic_estimate"]) & (statistics_df["depth"] == row_no_oe["depth"]) & (statistics_df["optimization_mode"] == row_no_oe["optimization_mode"]))]

    time_mean_no_oe = row_no_oe[PARAM_RUNTIME_STATISTIC]
    time_mean_with_oe = row_with_oe.iloc[0][PARAM_RUNTIME_STATISTIC]
    time_speedup = time_mean_no_oe / time_mean_with_oe

    num_visited_subgroups_no_oe = row_no_oe["num_visited_subgroups"]
    num_visited_subgroups_with_oe = row_with_oe.iloc[0]["num_visited_subgroups"]
    num_visited_subgroups_speedup = num_visited_subgroups_with_oe / num_visited_subgroups_no_oe

    speedups.append([row_no_oe["qf_name"], row_no_oe["depth"], row_no_oe["optimization_mode"], time_speedup, num_visited_subgroups_speedup])

speedups_df = pd.DataFrame(speedups, columns=["qf_name", "depth", "optimization_mode", "time_speedup", "num_visited_subgroups_speedup"])

print(speedups_df)

## Write the Result

In [None]:
input_file_basename = os.path.basename(PARAM_RUNTIME_STATISTICS_PATH)
speedups_df.to_csv(f"{STAGE_OUTPUT_PATH}/{PARAM_OUT_FILENAME_PREFIX}{input_file_basename}", index=False)