# BenchBox Visualization Examples

This notebook demonstrates **professional visualization techniques** for benchmark results. Learn how to create publication-quality charts that effectively communicate performance insights.

## What You'll Learn

- **Basic charts**: Bar charts, line plots, scatter plots
- **Performance analysis**: Histograms, box plots, violin plots
- **Comparison visualizations**: Heatmaps, grouped bars, side-by-side comparisons
- **Statistical visualizations**: Percentile plots, distribution analysis, outlier detection
- **Advanced techniques**: Pareto charts, waterfall charts, radar charts
- **Customization**: Colors, themes, annotations, platform branding

## Visualization Library

We use **matplotlib** and **seaborn** for flexibility and publication quality.

## Expected Runtime

All visualizations generate quickly:
- Setup: **5-10 seconds**
- Chart generation: **1-2 seconds per chart**
- Complete notebook: **1-2 minutes**

## 1. Setup and Sample Data

In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Set style for all plots
plt.style.use("seaborn-v0_8-darkgrid")
sns.set_palette("husl")

# Figure settings
plt.rcParams["figure.figsize"] = (10, 6)
plt.rcParams["font.size"] = 10
plt.rcParams["axes.labelsize"] = 11
plt.rcParams["axes.titlesize"] = 12
plt.rcParams["xtick.labelsize"] = 10
plt.rcParams["ytick.labelsize"] = 10
plt.rcParams["legend.fontsize"] = 10

# Output directory
output_dir = "./visualization_examples"
os.makedirs(output_dir, exist_ok=True)

print("‚úÖ Visualization environment configured")
print(f"üìÅ Charts will be saved to: {output_dir}")

In [None]:
# Create sample benchmark data for demonstration
np.random.seed(42)

# Sample data: TPC-H queries across 3 platforms
queries = [f"Q{i}" for i in range(1, 23)]
platforms = ["DuckDB", "BigQuery", "Snowflake"]

data = []
for platform in platforms:
    base_time = {"DuckDB": 0.5, "BigQuery": 1.2, "Snowflake": 0.8}[platform]
    for query in queries:
        # Simulate realistic query times with some queries being slower
        query_multiplier = np.random.lognormal(0, 0.5)
        execution_time = base_time * query_multiplier

        data.append(
            {
                "platform": platform,
                "query": query,
                "execution_time_s": execution_time,
                "success": True,
            }
        )

df = pd.DataFrame(data)

print(f"üìä Generated sample data: {len(df)} query executions")
print(f"   Platforms: {', '.join(platforms)}")
print(f"   Queries: {len(queries)}")
print("\nFirst few rows:")
print(df.head())

## 2. Basic Performance Bar Chart

In [None]:
# Simple bar chart showing average performance by platform
fig, ax = plt.subplots(figsize=(10, 6))

platform_means = df.groupby("platform")["execution_time_s"].mean().sort_values()

# Platform colors
colors = {"DuckDB": "#FFC220", "BigQuery": "#4285F4", "Snowflake": "#29B5E8"}
bar_colors = [colors.get(p, "#888888") for p in platform_means.index]

bars = ax.bar(
    platform_means.index, platform_means.values, color=bar_colors, alpha=0.8, edgecolor="black", linewidth=1.5
)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    ax.text(
        bar.get_x() + bar.get_width() / 2.0,
        height,
        f"{height:.2f}s",
        ha="center",
        va="bottom",
        fontweight="bold",
        fontsize=11,
    )

ax.set_ylabel("Average Execution Time (seconds)", fontsize=12, fontweight="bold")
ax.set_xlabel("Platform", fontsize=12, fontweight="bold")
ax.set_title("Platform Performance Comparison\nTPC-H Benchmark (22 queries)", fontsize=14, fontweight="bold", pad=20)
ax.grid(axis="y", alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig(f"{output_dir}/01_basic_bar_chart.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 01_basic_bar_chart.png")

## 3. Query-Level Performance Comparison

In [None]:
# Grouped bar chart comparing all queries across platforms
fig, ax = plt.subplots(figsize=(14, 6))

# Pivot data for grouped bars
pivot_df = df.pivot(index="query", columns="platform", values="execution_time_s")

# Plot grouped bars
x = np.arange(len(pivot_df.index))
width = 0.25

for i, platform in enumerate(pivot_df.columns):
    offset = width * (i - 1)
    ax.bar(x + offset, pivot_df[platform], width, label=platform, color=colors.get(platform, "#888888"), alpha=0.8)

ax.set_xlabel("Query", fontsize=12, fontweight="bold")
ax.set_ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold")
ax.set_title("Per-Query Performance Comparison", fontsize=14, fontweight="bold", pad=15)
ax.set_xticks(x)
ax.set_xticklabels(pivot_df.index, rotation=45, ha="right")
ax.legend(title="Platform", title_fontsize=11, fontsize=10)
ax.grid(axis="y", alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig(f"{output_dir}/02_grouped_bar_chart.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 02_grouped_bar_chart.png")

## 4. Distribution Analysis with Histogram

In [None]:
# Histogram showing distribution of query times
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for i, platform in enumerate(platforms):
    ax = axes[i]
    platform_data = df[df["platform"] == platform]["execution_time_s"]

    # Histogram
    ax.hist(platform_data, bins=15, color=colors.get(platform, "#888888"), alpha=0.7, edgecolor="black", linewidth=1)

    # Add mean and median lines
    mean_val = platform_data.mean()
    median_val = platform_data.median()

    ax.axvline(mean_val, color="red", linestyle="--", linewidth=2, label=f"Mean: {mean_val:.2f}s")
    ax.axvline(median_val, color="green", linestyle="--", linewidth=2, label=f"Median: {median_val:.2f}s")

    ax.set_xlabel("Execution Time (seconds)", fontsize=10)
    ax.set_ylabel("Frequency", fontsize=10)
    ax.set_title(f"{platform}\nQuery Time Distribution", fontsize=11, fontweight="bold")
    ax.legend(fontsize=9)
    ax.grid(axis="y", alpha=0.3)

plt.tight_layout()
plt.savefig(f"{output_dir}/03_histogram_distribution.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 03_histogram_distribution.png")

## 5. Box Plot for Outlier Detection

In [None]:
# Box plot to visualize quartiles and outliers
fig, ax = plt.subplots(figsize=(10, 6))

# Create box plot
bp = ax.boxplot(
    [df[df["platform"] == p]["execution_time_s"].values for p in platforms],
    labels=platforms,
    patch_artist=True,
    showmeans=True,
    meanprops=dict(marker="D", markerfacecolor="red", markersize=8),
)

# Color boxes
for patch, platform in zip(bp["boxes"], platforms):
    patch.set_facecolor(colors.get(platform, "#888888"))
    patch.set_alpha(0.7)

ax.set_ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold")
ax.set_xlabel("Platform", fontsize=12, fontweight="bold")
ax.set_title(
    "Query Time Distribution and Outliers\nBox Plot with Mean (‚óÜ) and Median (‚Äî)",
    fontsize=14,
    fontweight="bold",
    pad=20,
)
ax.grid(axis="y", alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig(f"{output_dir}/04_box_plot.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 04_box_plot.png")

## 6. Heatmap for Multi-Dimensional Comparison

In [None]:
# Heatmap showing relative performance
fig, ax = plt.subplots(figsize=(12, 8))

# Create pivot table
heatmap_data = df.pivot(index="query", columns="platform", values="execution_time_s")

# Normalize by row to show relative performance
# (value - min) / (max - min) for each query
heatmap_normalized = heatmap_data.sub(heatmap_data.min(axis=1), axis=0).div(
    heatmap_data.max(axis=1) - heatmap_data.min(axis=1), axis=0
)

# Create heatmap
sns.heatmap(
    heatmap_normalized,
    annot=heatmap_data.values,  # Show actual times
    fmt=".2f",
    cmap="RdYlGn_r",  # Red=slow, Green=fast
    cbar_kws={"label": "Relative Performance\n(0=fastest, 1=slowest)"},
    linewidths=0.5,
    linecolor="gray",
    ax=ax,
)

ax.set_title(
    "Relative Performance Heatmap\n(Actual times shown, color=relative speed)", fontsize=14, fontweight="bold", pad=20
)
ax.set_xlabel("Platform", fontsize=12, fontweight="bold")
ax.set_ylabel("Query", fontsize=12, fontweight="bold")

plt.tight_layout()
plt.savefig(f"{output_dir}/05_heatmap.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 05_heatmap.png")

## 7. Percentile Analysis

In [None]:
# Percentile comparison across platforms
fig, ax = plt.subplots(figsize=(12, 6))

percentiles = [25, 50, 75, 90, 95, 99]
x = np.arange(len(percentiles))
width = 0.25

for i, platform in enumerate(platforms):
    platform_data = df[df["platform"] == platform]["execution_time_s"]
    pct_values = [np.percentile(platform_data, p) for p in percentiles]

    offset = width * (i - 1)
    bars = ax.bar(x + offset, pct_values, width, label=platform, color=colors.get(platform, "#888888"), alpha=0.8)

    # Add value labels on P99
    if i < len(bars):
        ax.text(
            x[-1] + offset,
            pct_values[-1] + 0.02,
            f"{pct_values[-1]:.2f}s",
            ha="center",
            va="bottom",
            fontsize=9,
            fontweight="bold",
        )

ax.set_xlabel("Percentile", fontsize=12, fontweight="bold")
ax.set_ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold")
ax.set_title("Percentile Performance Analysis\nP25, P50, P75, P90, P95, P99", fontsize=14, fontweight="bold", pad=20)
ax.set_xticks(x)
ax.set_xticklabels([f"P{p}" for p in percentiles])
ax.legend(title="Platform", title_fontsize=11)
ax.grid(axis="y", alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig(f"{output_dir}/06_percentile_analysis.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 06_percentile_analysis.png")

## 8. Pareto Chart (80/20 Rule)

In [None]:
# Pareto chart: identify queries responsible for most execution time
fig, ax1 = plt.subplots(figsize=(14, 6))

# Use one platform for demonstration
platform_data = df[df["platform"] == "DuckDB"].copy()
platform_data = platform_data.sort_values("execution_time_s", ascending=False)

# Calculate cumulative percentage
total_time = platform_data["execution_time_s"].sum()
platform_data["cumulative_pct"] = (platform_data["execution_time_s"].cumsum() / total_time) * 100

# Bar chart
x = np.arange(len(platform_data))
ax1.bar(x, platform_data["execution_time_s"].values, color="#FFC220", alpha=0.7, edgecolor="black")
ax1.set_xlabel("Query (sorted by execution time)", fontsize=12, fontweight="bold")
ax1.set_ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold", color="black")
ax1.tick_params(axis="y", labelcolor="black")

# Cumulative line
ax2 = ax1.twinx()
ax2.plot(x, platform_data["cumulative_pct"].values, color="red", marker="o", linewidth=2, markersize=4)
ax2.set_ylabel("Cumulative % of Total Time", fontsize=12, fontweight="bold", color="red")
ax2.tick_params(axis="y", labelcolor="red")
ax2.set_ylim([0, 105])

# Add 80% line
ax2.axhline(80, color="green", linestyle="--", linewidth=2, alpha=0.7, label="80% threshold")
ax2.legend(loc="lower right")

ax1.set_title(
    "Pareto Chart: Query Execution Time Distribution\nDuckDB Platform", fontsize=14, fontweight="bold", pad=20
)
ax1.set_xticks(x[::2])  # Show every other query label
ax1.set_xticklabels(platform_data["query"].values[::2], rotation=45, ha="right")
ax1.grid(axis="y", alpha=0.3)

plt.tight_layout()
plt.savefig(f"{output_dir}/07_pareto_chart.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 07_pareto_chart.png")
print(f"\nüí° Insight: Top {sum(platform_data['cumulative_pct'] <= 80)} queries account for 80% of execution time")

## 9. Violin Plot for Distribution Shape

In [None]:
# Violin plot shows full distribution shape
fig, ax = plt.subplots(figsize=(10, 6))

# Create violin plot
parts = ax.violinplot(
    [df[df["platform"] == p]["execution_time_s"].values for p in platforms],
    positions=range(len(platforms)),
    showmeans=True,
    showmedians=True,
    widths=0.7,
)

# Color violins
for i, (pc, platform) in enumerate(zip(parts["bodies"], platforms)):
    pc.set_facecolor(colors.get(platform, "#888888"))
    pc.set_alpha(0.7)

ax.set_xticks(range(len(platforms)))
ax.set_xticklabels(platforms)
ax.set_ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold")
ax.set_xlabel("Platform", fontsize=12, fontweight="bold")
ax.set_title(
    "Violin Plot: Query Time Distribution Shape\n(Width = frequency at that time)",
    fontsize=14,
    fontweight="bold",
    pad=20,
)
ax.grid(axis="y", alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig(f"{output_dir}/08_violin_plot.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 08_violin_plot.png")

## 10. Speedup Comparison

In [None]:
# Calculate speedup relative to slowest platform for each query
fig, ax = plt.subplots(figsize=(12, 6))

pivot_df = df.pivot(index="query", columns="platform", values="execution_time_s")

# Calculate speedup relative to slowest
slowest = pivot_df.max(axis=1)
speedup_df = pivot_df.div(slowest, axis=0)  # Lower is better (closer to 1.0 is slowest)

# Convert to speedup factor (how many times faster than slowest)
speedup_df = 1 / speedup_df

# Plot
x = np.arange(len(speedup_df.index))
width = 0.25

for i, platform in enumerate(speedup_df.columns):
    offset = width * (i - 1)
    ax.bar(x + offset, speedup_df[platform], width, label=platform, color=colors.get(platform, "#888888"), alpha=0.8)

# Add 1.0 reference line (no speedup)
ax.axhline(1.0, color="red", linestyle="--", linewidth=2, alpha=0.5, label="Baseline (slowest)")

ax.set_xlabel("Query", fontsize=12, fontweight="bold")
ax.set_ylabel("Speedup vs Slowest Platform\n(Higher is Better)", fontsize=12, fontweight="bold")
ax.set_title("Relative Speedup Analysis\n(1.0 = slowest, higher = faster)", fontsize=14, fontweight="bold", pad=20)
ax.set_xticks(x)
ax.set_xticklabels(speedup_df.index, rotation=45, ha="right")
ax.legend(title="Platform", title_fontsize=11)
ax.grid(axis="y", alpha=0.3, linestyle="--")

plt.tight_layout()
plt.savefig(f"{output_dir}/09_speedup_comparison.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 09_speedup_comparison.png")

## 11. Multi-Panel Summary Dashboard

In [None]:
# Create comprehensive 2x2 dashboard
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Panel 1: Average performance
ax1 = axes[0, 0]
platform_means = df.groupby("platform")["execution_time_s"].mean().sort_values()
bar_colors = [colors.get(p, "#888888") for p in platform_means.index]
ax1.bar(platform_means.index, platform_means.values, color=bar_colors, alpha=0.8)
ax1.set_ylabel("Avg Time (s)", fontweight="bold")
ax1.set_title("Average Performance", fontsize=12, fontweight="bold")
ax1.grid(axis="y", alpha=0.3)

# Panel 2: Distribution (box plot)
ax2 = axes[0, 1]
bp = ax2.boxplot(
    [df[df["platform"] == p]["execution_time_s"].values for p in platforms], labels=platforms, patch_artist=True
)
for patch, platform in zip(bp["boxes"], platforms):
    patch.set_facecolor(colors.get(platform, "#888888"))
    patch.set_alpha(0.7)
ax2.set_ylabel("Time (s)", fontweight="bold")
ax2.set_title("Distribution & Outliers", fontsize=12, fontweight="bold")
ax2.grid(axis="y", alpha=0.3)

# Panel 3: Query-level heatmap
ax3 = axes[1, 0]
heatmap_data = df.pivot(index="query", columns="platform", values="execution_time_s")
im = ax3.imshow(np.log10(heatmap_data.values + 0.01), cmap="RdYlGn_r", aspect="auto")
ax3.set_xticks(range(len(heatmap_data.columns)))
ax3.set_xticklabels(heatmap_data.columns, rotation=45, ha="right")
ax3.set_yticks(range(0, len(heatmap_data.index), 2))
ax3.set_yticklabels(heatmap_data.index[::2])
ax3.set_title("Per-Query Heatmap (log scale)", fontsize=12, fontweight="bold")
plt.colorbar(im, ax=ax3, label="log‚ÇÅ‚ÇÄ(seconds)")

# Panel 4: Performance metrics table
ax4 = axes[1, 1]
ax4.axis("tight")
ax4.axis("off")

# Calculate summary statistics
summary_data = []
for platform in platforms:
    platform_data = df[df["platform"] == platform]["execution_time_s"]
    summary_data.append(
        [
            platform,
            f"{platform_data.mean():.3f}",
            f"{platform_data.median():.3f}",
            f"{np.percentile(platform_data, 95):.3f}",
            f"{platform_data.std():.3f}",
        ]
    )

table = ax4.table(
    cellText=summary_data,
    colLabels=["Platform", "Mean", "Median", "P95", "StdDev"],
    cellLoc="center",
    loc="center",
    colColours=["#E8E8E8"] * 5,
)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2)
ax4.set_title("Summary Statistics (seconds)", fontsize=12, fontweight="bold", pad=20)

fig.suptitle("Benchmark Performance Dashboard", fontsize=16, fontweight="bold", y=0.995)
plt.tight_layout(rect=[0, 0, 1, 0.99])
plt.savefig(f"{output_dir}/10_dashboard.png", dpi=150, bbox_inches="tight")
plt.show()

print("üíæ Saved: 10_dashboard.png")

## 12. Summary and Best Practices

In [None]:
print("=" * 70)
print("üìä VISUALIZATION BEST PRACTICES")
print("=" * 70)

print("\nüé® Chart Selection Guide:")
print("  ‚Ä¢ Bar charts: Compare categories (platforms, queries)")
print("  ‚Ä¢ Histograms: Show distribution of continuous data")
print("  ‚Ä¢ Box plots: Identify outliers and quartiles")
print("  ‚Ä¢ Violin plots: Show full distribution shape")
print("  ‚Ä¢ Heatmaps: Compare multiple dimensions at once")
print("  ‚Ä¢ Pareto charts: Identify most impactful items (80/20 rule)")
print("  ‚Ä¢ Line plots: Show trends over time or scale")

print("\n‚ú® Design Principles:")
print("  1. Use consistent colors for platforms across charts")
print("  2. Add gridlines for easier reading")
print("  3. Label axes clearly with units")
print("  4. Add value labels for key data points")
print("  5. Use log scale for highly skewed data")
print("  6. Highlight important thresholds (budgets, SLAs)")
print("  7. Include legends for multi-series charts")
print("  8. Use titles that explain insights, not just describe")

print("\nüéØ Platform-Specific Colors:")
for platform, color in colors.items():
    print(f"  ‚Ä¢ {platform}: {color}")

print(f"\nüìÅ All visualizations saved to: {output_dir}")
print("\nüí° Next Steps:")
print("  ‚Ä¢ Load your actual benchmark results")
print("  ‚Ä¢ Customize colors and styles for your brand")
print("  ‚Ä¢ Create custom dashboards combining multiple views")
print("  ‚Ä¢ Export to PDF for reports or presentations")
print("\n" + "=" * 70)