In [7]:
import seaborn as sns

sns.set_theme(style="darkgrid")


# cpp_across_strategies

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Define the raw data
data = [
    ("SEQUENTIAL", "aggregation", 1, 90.8),
    ("GLOBAL_LOCK", "aggregation", 1, 121.8),
    ("GLOBAL_LOCK", "aggregation", 2, 515.6),
    ("GLOBAL_LOCK", "aggregation", 4, 506.8),
    ("GLOBAL_LOCK", "aggregation", 8, 863.4),
    ("GLOBAL_LOCK", "aggregation", 16, 1152.6),
    ("TWO_PHASE_CENTRALIZED_MERGE", "aggregation", 1, 138),
    ("TWO_PHASE_CENTRALIZED_MERGE", "aggregation", 2, 155.2),
    ("TWO_PHASE_CENTRALIZED_MERGE", "aggregation", 4, 173.2),
    ("TWO_PHASE_CENTRALIZED_MERGE", "aggregation", 8, 164.6),
    ("TWO_PHASE_CENTRALIZED_MERGE", "aggregation", 16, 175.6),
    ("SIMPLE_TWO_PHASE_RADIX", "aggregation", 1, 188),
    ("SIMPLE_TWO_PHASE_RADIX", "aggregation", 2, 144),
    ("SIMPLE_TWO_PHASE_RADIX", "aggregation", 4, 81.4),
    ("SIMPLE_TWO_PHASE_RADIX", "aggregation", 8, 51.4),
    ("SIMPLE_TWO_PHASE_RADIX", "aggregation", 16, 32.8),
    ("SIMPLE_THREE_PHASE_RADIX", "aggregation", 1, 221),
    ("SIMPLE_THREE_PHASE_RADIX", "aggregation", 2, 202.2),
    ("SIMPLE_THREE_PHASE_RADIX", "aggregation", 4, 104.8),
    ("SIMPLE_THREE_PHASE_RADIX", "aggregation", 8, 63),
    ("SIMPLE_THREE_PHASE_RADIX", "aggregation", 16, 48.6),
    ("IMPLICIT_REPARTITIONING", "aggregation", 1, 174.6),
    ("IMPLICIT_REPARTITIONING", "aggregation", 2, 106),
    ("IMPLICIT_REPARTITIONING", "aggregation", 4, 53.2),
    ("IMPLICIT_REPARTITIONING", "aggregation", 8, 33.6),
    ("IMPLICIT_REPARTITIONING", "aggregation", 16, 40.4),
    ("DUCKDBISH_TWO_PHASE", "aggregation", 1, 235),
    ("DUCKDBISH_TWO_PHASE", "aggregation", 2, 185.8),
    ("DUCKDBISH_TWO_PHASE", "aggregation", 4, 105.4),
    ("DUCKDBISH_TWO_PHASE", "aggregation", 8, 61),
    ("DUCKDBISH_TWO_PHASE", "aggregation", 16, 50.2),
]

# Create DataFrame
df = pd.DataFrame(data, columns=["strategy", "category", "num_threads", "timing_ms"])

# Normalize to speedup over 1 thread for each strategy
df_single_thread = df[df["num_threads"] == 1][["strategy", "timing_ms"]].set_index("strategy")["timing_ms"]
df["speedup"] = df.apply(lambda row: df_single_thread[row["strategy"]] / row["timing_ms"], axis=1)

# Plot
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x="num_threads", y="speedup", hue="strategy", marker="o")
plt.title("Speedup of Aggregation Strategies vs Number of Threads")
plt.xlabel("Number of Threads")
plt.ylabel("Speedup (Relative to 1 Thread)")
plt.xticks([1, 2, 4, 8, 16])
plt.grid(True)
plt.tight_layout()
plt.savefig("figures/cpp_across_strategies.png", dpi=300)
plt.close()
