In [6]:
import pandas as pd
import numpy as np
import os
import pickle
from performance_profile import get_workloads_time_to_target
from performance_profile import BASE_WORKLOADS
from scipy import stats
from plotting_utils import PLOT_STYLE

In [7]:
# Load results from disk
SELF_TUNING = True
results_path = '../../results/'
file_path = 'self_tuning/self_tuning_scoring_results.txt' if SELF_TUNING else 'external_tuning/external_tuning_scoring_results.txt'
RESULTS_FILE = os.path.join(results_path, file_path)

with open(RESULTS_FILE, 'rb') as f:
    results = pickle.load(f)

In [8]:
times = {}
for submission in results.keys():
    # Compute median over runtimes for both training algorithms
    times[submission] = get_workloads_time_to_target(
        results[submission],
        submission,
        time_col="score",
        self_tuning_ruleset=SELF_TUNING,
    )



In [9]:
MAX_BUDGETS = {
    'criteo1tb': 7703,
    'fastmri': 8859,
    'imagenet_resnet': 63_008,
    'imagenet_vit': 77_520,
    'librispeech_conformer': 61_068,
    'librispeech_deepspeech': 55_506,
    'ogbg': 18_477,
    'wmt': 48_151,
}

def replace_inf(row):
  """Replace ifs with maximum runtime budget (+1 second).

  Args:
      row (pd.Series): The original row.

  Returns:
      pd.Series: The row with infs replaced.
  """
  workload_name = row.name
  # Factor of 3 for self-tuning ruleset
  factor = 3 if SELF_TUNING else 1
  max_runtime_workload = factor * MAX_BUDGETS[workload_name]
  row.replace(np.inf, max_runtime_workload + 1, inplace=True)
  row.replace(np.nan, max_runtime_workload + 1, inplace=True)
  return row

In [10]:
# Combine all times into one dataframe
merged_df = pd.concat(times)

# Drop the first index level
merged_df = merged_df.droplevel(level=0)

# Only keep base workloads
workloads_to_keep = [col for col in merged_df.columns if col in BASE_WORKLOADS]
merged_df = merged_df[workloads_to_keep]

# Replace infs with maximum runtime budget (+1 second)
merged_df = merged_df.apply(replace_inf, axis=0)

# Get the baseline runtimes for each workload
baseline_runtimes = merged_df.loc["prize_qualification_baseline"]

# Compute speedup vs. baseline
relative_runtimes = merged_df.div(baseline_runtimes, axis=1)

# Compute geometric mean speedup
merged_df["geometric_mean"] = stats.gmean(relative_runtimes, axis=1)

# Sort by speedup
merged_df.sort_values(by="geometric_mean", ascending=True, inplace=True)

speedups = (merged_df["geometric_mean"] - 1) * 100
speedups = speedups.apply(lambda x: f"-{x:.2f}\%" if x >= 0 else f"+{abs(x):.2f}\%")
merged_df["speedup"] = speedups

# Drop the geometric mean column and workloads
merged_df = merged_df.drop(columns=["geometric_mean"])
merged_df = merged_df.drop(columns=workloads_to_keep)

# Beautify the submission names
merged_df.index = merged_df.index.map(lambda x: PLOT_STYLE.get(x, {}).get('command', x))

print(merged_df.to_latex())
# merged_df

\begin{tabular}{ll}
\toprule
workload & speedup \\
submission &  \\
\midrule
\sfadam & +7.76\% \\
\baseline & -0.00\% \\
\nadamwseq & -92.44\% \\
\sinvnum & -157.67\% \\
\sinv & -168.63\% \\
\adamg & -294.17\% \\
\bottomrule
\end{tabular}

