# BenchBox Platform Comparison

This notebook demonstrates how to **compare performance and costs** across multiple database platforms using BenchBox. Whether you're evaluating cloud data warehouses (Databricks, BigQuery, Snowflake, Redshift) or local analytical databases (DuckDB, SQLite), this notebook provides tools to make data-driven platform decisions.

## What You'll Learn

- **Load and normalize** benchmark results from multiple platforms
- **Compare performance** using statistical analysis and visualizations
- **Analyze cost-effectiveness** across cloud platforms
- **Identify platform strengths** for different query types
- **Generate recommendations** based on your workload characteristics

## Prerequisites

You should have run benchmarks on at least 2 platforms using the platform-specific notebooks:
- `databricks_benchmarking.ipynb`
- `bigquery_benchmarking.ipynb`
- `snowflake_benchmarking.ipynb`
- `redshift_benchmarking.ipynb`
- `duckdb_benchmarking.ipynb`
- `sqlite_benchmarking.ipynb`

## Expected Runtime

This notebook performs analysis on existing results, so it runs quickly:
- Loading and preparation: **30-60 seconds**
- Visualization generation: **1-2 minutes**
- Complete notebook: **2-3 minutes**

## 1. Installation & Setup

In [None]:
# Install BenchBox if not already installed
# !pip install benchbox

import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from benchbox import __version__

print(f"BenchBox version: {__version__}")
print(f"Environment: {os.environ.get('BENCHBOX_ENV', 'development')}")
print(f"Working directory: {os.getcwd()}")

In [None]:
# Configuration
config = {
    # Directories where benchmark results are stored
    "results_dirs": {
        "databricks": "./benchmark_results/databricks",
        "bigquery": "./benchmark_results/bigquery",
        "snowflake": "./benchmark_results/snowflake",
        "redshift": "./benchmark_results/redshift",
        "duckdb": "./benchmark_results/duckdb",
        "sqlite": "./benchmark_results/sqlite",
    },
    # Cloud platform pricing (example rates, update with your actual costs)
    "pricing": {
        "databricks": {
            "dbu_per_hour": 8.0,  # DBU consumption rate
            "dbu_cost": 0.40,  # Cost per DBU
            "compute_cost": 0.15,  # EC2 cost per hour (example: i3.xlarge)
        },
        "bigquery": {
            "per_tb_processed": 5.00,  # On-demand pricing
            "storage_per_tb": 0.02,  # Active storage per TB/month
        },
        "snowflake": {
            "credit_cost": 2.00,  # Enterprise edition
            "storage_per_tb": 0.023,  # Per TB/month
        },
        "redshift": {
            "node_cost_per_hour": 0.25,  # ra3.xlplus on-demand
            "storage_per_tb": 0.024,  # RA3 managed storage
        },
        "duckdb": {
            "infrastructure_cost": 0.0,  # Local execution
        },
        "sqlite": {
            "infrastructure_cost": 0.0,  # Local execution
        },
    },
    # Platform colors for consistent visualization
    "colors": {
        "databricks": "#FF3621",
        "bigquery": "#4285F4",
        "snowflake": "#29B5E8",
        "redshift": "#CC0000",
        "duckdb": "#FFC220",
        "sqlite": "#003B57",
    },
    # Output directory
    "output_dir": "./comparison_results",
}

# Create output directory
os.makedirs(config["output_dir"], exist_ok=True)

print("‚úÖ Configuration loaded")
print(f"Platforms to compare: {', '.join(config['results_dirs'].keys())}")

## 2. Load Benchmark Results

Load results from all available platforms. This function will gracefully handle missing results.

In [None]:
def load_platform_results(platform: str, results_dir: str) -> Optional[Dict[str, Any]]:
    """Load the most recent benchmark results for a platform.

    Args:
        platform: Platform name (e.g., 'databricks', 'bigquery')
        results_dir: Directory containing benchmark results

    Returns:
        Dictionary containing benchmark results, or None if not found
    """
    results_path = Path(results_dir)

    if not results_path.exists():
        print(f"‚ö†Ô∏è  No results found for {platform} at {results_dir}")
        return None

    # Find most recent results file
    json_files = list(results_path.glob("*.json"))
    if not json_files:
        print(f"‚ö†Ô∏è  No JSON results found for {platform}")
        return None

    # Sort by modification time, get most recent
    latest_file = max(json_files, key=lambda p: p.stat().st_mtime)

    try:
        with open(latest_file) as f:
            results = json.load(f)

        print(f"‚úÖ Loaded {platform} results from {latest_file.name}")
        print(f"   Benchmark: {results.get('benchmark_name', 'unknown')}")
        print(f"   Scale: {results.get('scale_factor', 'unknown')}")
        print(f"   Queries: {len(results.get('query_results', []))}")

        return results

    except Exception as e:
        print(f"‚ùå Error loading {platform} results: {e}")
        return None


# Load all available platform results
platform_results = {}

for platform, results_dir in config["results_dirs"].items():
    results = load_platform_results(platform, results_dir)
    if results:
        platform_results[platform] = results

print(f"\nüìä Loaded results from {len(platform_results)} platforms: {', '.join(platform_results.keys())}")

if len(platform_results) < 2:
    print("\n‚ö†Ô∏è  Warning: Need results from at least 2 platforms for meaningful comparison")
    print("   Run benchmarks using the platform-specific notebooks first.")

## 3. Normalize and Align Results

Different platforms may have run different queries at different scales. We need to normalize the data for fair comparison.

In [None]:
def normalize_results(platform_results: Dict[str, Dict]) -> pd.DataFrame:
    """Normalize benchmark results into a common DataFrame format.

    Args:
        platform_results: Dictionary mapping platform names to result dictionaries

    Returns:
        DataFrame with columns: platform, benchmark, scale_factor, query,
                                execution_time_ms, success, error_message
    """
    records = []

    for platform, results in platform_results.items():
        benchmark = results.get("benchmark_name", "unknown")
        scale_factor = results.get("scale_factor", 0.0)

        for query_result in results.get("query_results", []):
            records.append(
                {
                    "platform": platform,
                    "benchmark": benchmark,
                    "scale_factor": scale_factor,
                    "query": query_result.get("query_name", query_result.get("query_id", "unknown")),
                    "execution_time_ms": query_result.get("execution_time_ms", None),
                    "success": query_result.get("success", False),
                    "error_message": query_result.get("error_message", None),
                }
            )

    df = pd.DataFrame(records)

    # Convert execution time to seconds for easier reading
    df["execution_time_s"] = df["execution_time_ms"] / 1000.0

    return df


# Create normalized DataFrame
df = normalize_results(platform_results)

print(f"üìä Normalized results: {len(df)} query executions")
print("\nPlatform distribution:")
print(df["platform"].value_counts())
print("\nBenchmark distribution:")
print(df["benchmark"].value_counts())
print("\nSuccess rate by platform:")
print(df.groupby("platform")["success"].mean())

In [None]:
# Find common queries across platforms for apples-to-apples comparison
def find_common_queries(df: pd.DataFrame) -> pd.DataFrame:
    """Filter to queries that ran successfully on multiple platforms."""

    # Only consider successful queries
    successful = df[df["success"] == True].copy()

    # Group by benchmark, scale, and query
    query_platforms = successful.groupby(["benchmark", "scale_factor", "query"])["platform"].apply(set).reset_index()

    # Find queries that ran on at least 2 platforms
    query_platforms["platform_count"] = query_platforms["platform"].apply(len)
    common = query_platforms[query_platforms["platform_count"] >= 2]

    # Filter original DataFrame to common queries
    common_queries_df = successful.merge(
        common[["benchmark", "scale_factor", "query"]], on=["benchmark", "scale_factor", "query"], how="inner"
    )

    return common_queries_df


df_common = find_common_queries(df)

print(f"üìä Common queries across platforms: {len(df_common)} executions")
print(f"   Unique queries: {df_common['query'].nunique()}")
print("\nQueries per platform:")
print(df_common.groupby("platform").size())

if len(df_common) == 0:
    print("\n‚ö†Ô∏è  No common queries found. Platforms may have run different benchmarks or scales.")
    print("   Continuing with all results for individual platform analysis.")
    df_common = df[df["success"] == True].copy()

## 4. Overall Performance Comparison

Compare aggregate performance metrics across platforms.

In [None]:
# Calculate aggregate statistics per platform
platform_stats = (
    df_common.groupby("platform")
    .agg({"execution_time_s": ["mean", "median", "std", "min", "max", "sum"], "query": "count"})
    .round(3)
)

platform_stats.columns = ["_".join(col).strip() for col in platform_stats.columns.values]
platform_stats = platform_stats.rename(columns={"query_count": "num_queries"})

# Calculate geometric mean (better for skewed performance data)
geo_means = (
    df_common.groupby("platform")["execution_time_s"]
    .apply(lambda x: np.exp(np.log(x.replace(0, 0.001)).mean()))
    .round(3)
)
platform_stats["execution_time_s_geomean"] = geo_means

# Sort by geometric mean
platform_stats = platform_stats.sort_values("execution_time_s_geomean")

print("üìä Platform Performance Summary\n")
print(platform_stats)

print("\nüèÜ Rankings (lower is better):")
for i, platform in enumerate(platform_stats.index, 1):
    geo_mean = platform_stats.loc[platform, "execution_time_s_geomean"]
    print(f"{i}. {platform.capitalize()}: {geo_mean:.3f}s geometric mean")

In [None]:
# Visualization: Overall performance comparison
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Geometric mean comparison (most relevant for benchmarks)
ax1 = axes[0]
platforms = platform_stats.index
geo_means = platform_stats["execution_time_s_geomean"].values
colors = [config["colors"].get(p, "#888888") for p in platforms]

bars = ax1.barh(platforms, geo_means, color=colors, alpha=0.8)
ax1.set_xlabel("Geometric Mean Execution Time (seconds)", fontsize=11)
ax1.set_title("Platform Performance Comparison\n(Lower is Better)", fontsize=12, fontweight="bold")
ax1.grid(axis="x", alpha=0.3)

# Add value labels
for bar, value in zip(bars, geo_means):
    ax1.text(
        value + max(geo_means) * 0.02, bar.get_y() + bar.get_height() / 2, f"{value:.3f}s", va="center", fontsize=10
    )

# Plot 2: Distribution comparison (box plot)
ax2 = axes[1]
df_plot = df_common[["platform", "execution_time_s"]].copy()

# Create box plot
platforms_sorted = platform_stats.index.tolist()
df_plot["platform"] = pd.Categorical(df_plot["platform"], categories=platforms_sorted, ordered=True)
df_plot = df_plot.sort_values("platform")

bp = ax2.boxplot(
    [df_plot[df_plot["platform"] == p]["execution_time_s"].values for p in platforms_sorted],
    labels=platforms_sorted,
    patch_artist=True,
    vert=False,
)

# Color the boxes
for patch, platform in zip(bp["boxes"], platforms_sorted):
    patch.set_facecolor(config["colors"].get(platform, "#888888"))
    patch.set_alpha(0.6)

ax2.set_xlabel("Execution Time (seconds)", fontsize=11)
ax2.set_title("Query Time Distribution by Platform", fontsize=12, fontweight="bold")
ax2.grid(axis="x", alpha=0.3)

plt.tight_layout()
plt.savefig(f"{config['output_dir']}/platform_performance_comparison.png", dpi=150, bbox_inches="tight")
plt.show()

print(f"\nüíæ Saved: {config['output_dir']}/platform_performance_comparison.png")

## 5. Query-Level Performance Analysis

Compare performance for individual queries across platforms.

In [None]:
# Find queries that ran on multiple platforms
query_comparison = df_common.pivot_table(
    index="query", columns="platform", values="execution_time_s", aggfunc="mean"
).round(3)

# Calculate speedup relative to slowest platform
query_comparison["fastest"] = query_comparison.min(axis=1)
query_comparison["slowest"] = query_comparison.max(axis=1)
query_comparison["speedup"] = (query_comparison["slowest"] / query_comparison["fastest"]).round(2)

# Sort by speedup (highest variation)
query_comparison = query_comparison.sort_values("speedup", ascending=False)

print("üìä Per-Query Performance Comparison (execution time in seconds)\n")
print(query_comparison.head(10))

print("\nüèÜ Queries with highest performance variation:")
for query in query_comparison.head(5).index:
    speedup = query_comparison.loc[query, "speedup"]
    fastest = query_comparison.loc[query, "fastest"]
    slowest = query_comparison.loc[query, "slowest"]
    print(f"  {query}: {speedup:.1f}x difference ({fastest:.3f}s - {slowest:.3f}s)")

In [None]:
# Visualization: Heatmap of query performance across platforms
plt.figure(figsize=(12, max(6, len(query_comparison) * 0.3)))

# Select only platform columns for heatmap
platform_cols = [col for col in query_comparison.columns if col not in ["fastest", "slowest", "speedup"]]
heatmap_data = query_comparison[platform_cols]

# Create heatmap (log scale for better visualization if times vary widely)
sns.heatmap(
    np.log10(heatmap_data + 0.001),  # Log scale, add small value to avoid log(0)
    annot=heatmap_data.values,  # Show actual values
    fmt=".3f",
    cmap="RdYlGn_r",  # Red (slow) to Green (fast)
    cbar_kws={"label": "log‚ÇÅ‚ÇÄ(seconds)"},
    linewidths=0.5,
    linecolor="gray",
)

plt.title(
    "Query Performance Heatmap (seconds)\nLog Scale: Green=Fast, Red=Slow", fontsize=14, fontweight="bold", pad=15
)
plt.xlabel("Platform", fontsize=12)
plt.ylabel("Query", fontsize=12)
plt.tight_layout()
plt.savefig(f"{config['output_dir']}/query_performance_heatmap.png", dpi=150, bbox_inches="tight")
plt.show()

print(f"üíæ Saved: {config['output_dir']}/query_performance_heatmap.png")

## 6. Platform Strengths Analysis

Identify which platform performs best for different types of queries.

In [None]:
# Find the fastest platform for each query
def find_fastest_platform(row):
    """Determine which platform was fastest for this query."""
    query_data = df_common[df_common["query"] == row.name]
    if len(query_data) == 0:
        return None
    fastest = query_data.loc[query_data["execution_time_s"].idxmin()]
    return fastest["platform"]


query_comparison["fastest_platform"] = query_comparison.apply(find_fastest_platform, axis=1)

# Count wins per platform
platform_wins = query_comparison["fastest_platform"].value_counts()

print("üèÜ Platform Performance Wins (queries where platform was fastest)\n")
print(platform_wins)

# Calculate win percentage
total_queries = len(query_comparison)
print("\nüìä Win Percentage:")
for platform, wins in platform_wins.items():
    pct = (wins / total_queries) * 100
    print(f"  {platform.capitalize()}: {wins}/{total_queries} ({pct:.1f}%)")

In [None]:
# Visualization: Platform strengths pie chart
fig, ax = plt.subplots(figsize=(10, 7))

colors_list = [config["colors"].get(p, "#888888") for p in platform_wins.index]
wedges, texts, autotexts = ax.pie(
    platform_wins.values,
    labels=[p.capitalize() for p in platform_wins.index],
    autopct="%1.1f%%",
    colors=colors_list,
    startangle=90,
    textprops={"fontsize": 12},
)

# Make percentage text bold
for autotext in autotexts:
    autotext.set_color("white")
    autotext.set_fontweight("bold")

ax.set_title(
    "Platform Performance Leadership\n(% of Queries Where Platform Was Fastest)", fontsize=14, fontweight="bold", pad=20
)

plt.tight_layout()
plt.savefig(f"{config['output_dir']}/platform_strengths.png", dpi=150, bbox_inches="tight")
plt.show()

print(f"üíæ Saved: {config['output_dir']}/platform_strengths.png")

## 7. Cost Analysis

Compare cost-effectiveness across cloud platforms. **Note**: Update pricing config at the top with your actual costs.

In [None]:
def calculate_platform_cost(platform: str, results: Dict, pricing: Dict) -> Optional[float]:
    """Calculate estimated cost for a benchmark run.

    Args:
        platform: Platform name
        results: Benchmark results dictionary
        pricing: Pricing configuration

    Returns:
        Estimated cost in USD, or None if not calculable
    """
    if platform not in pricing:
        return None

    platform_pricing = pricing[platform]

    # Calculate total execution time in hours
    total_time_ms = sum(
        qr.get("execution_time_ms", 0) for qr in results.get("query_results", []) if qr.get("success", False)
    )
    total_time_hours = total_time_ms / (1000 * 60 * 60)

    if platform == "databricks":
        dbu_consumption = total_time_hours * platform_pricing["dbu_per_hour"]
        dbu_cost = dbu_consumption * platform_pricing["dbu_cost"]
        compute_cost = total_time_hours * platform_pricing["compute_cost"]
        return dbu_cost + compute_cost

    elif platform == "bigquery":
        # Estimate data processed (would need actual bytes from results)
        # For now, use scale factor as proxy
        scale = results.get("scale_factor", 0.1)
        estimated_tb_processed = scale * 0.1  # Rough estimate
        return estimated_tb_processed * platform_pricing["per_tb_processed"]

    elif platform == "snowflake":
        # Estimate credits (warehouse size * time)
        # Assume X-Small warehouse (1 credit/hour)
        credits = total_time_hours * 1.0
        return credits * platform_pricing["credit_cost"]

    elif platform == "redshift":
        # Node hours (assume 2-node cluster)
        node_hours = total_time_hours * 2
        return node_hours * platform_pricing["node_cost_per_hour"]

    elif platform in ["duckdb", "sqlite"]:
        return 0.0  # Local execution, no direct cloud cost

    return None


# Calculate costs for all platforms
platform_costs = {}
for platform, results in platform_results.items():
    cost = calculate_platform_cost(platform, results, config["pricing"])
    if cost is not None:
        platform_costs[platform] = cost

print("üí∞ Estimated Benchmark Costs (USD)\n")
for platform in sorted(platform_costs, key=platform_costs.get):
    cost = platform_costs[platform]
    print(f"  {platform.capitalize()}: ${cost:.4f}")

print("\n‚ö†Ô∏è  Note: These are estimates based on configuration. Update pricing config with your actual costs.")

In [None]:
# Calculate cost per query and cost-performance ratio
cost_performance = []

for platform in platform_stats.index:
    if platform in platform_costs:
        cost = platform_costs[platform]
        geo_mean_time = platform_stats.loc[platform, "execution_time_s_geomean"]
        num_queries = platform_stats.loc[platform, "num_queries"]

        cost_per_query = cost / num_queries if num_queries > 0 else 0
        # Cost-performance ratio: lower is better (cost per second of execution)
        cost_per_second = cost / (geo_mean_time * num_queries) if geo_mean_time > 0 else 0

        cost_performance.append(
            {
                "platform": platform,
                "total_cost": cost,
                "cost_per_query": cost_per_query,
                "geo_mean_time_s": geo_mean_time,
                "cost_per_second": cost_per_second,
            }
        )

df_cost = pd.DataFrame(cost_performance).sort_values("cost_per_query")

print("üí∞ Cost-Performance Analysis\n")
print(df_cost.to_string(index=False))

print("\nüèÜ Most Cost-Effective Platforms:")
for i, row in df_cost.head(3).iterrows():
    print(f"  {i + 1}. {row['platform'].capitalize()}: ${row['cost_per_query']:.6f} per query")

In [None]:
# Visualization: Cost vs Performance scatter plot
if len(df_cost) > 0:
    fig, ax = plt.subplots(figsize=(10, 7))

    for _, row in df_cost.iterrows():
        platform = row["platform"]
        ax.scatter(
            row["geo_mean_time_s"],
            row["cost_per_query"],
            s=300,
            color=config["colors"].get(platform, "#888888"),
            alpha=0.7,
            edgecolors="black",
            linewidths=2,
        )
        ax.annotate(
            platform.capitalize(),
            (row["geo_mean_time_s"], row["cost_per_query"]),
            xytext=(10, 10),
            textcoords="offset points",
            fontsize=11,
            fontweight="bold",
        )

    ax.set_xlabel("Performance (Geometric Mean Time, seconds)\n‚Üê Faster", fontsize=12)
    ax.set_ylabel("Cost per Query (USD)\n‚Üì Cheaper", fontsize=12)
    ax.set_title("Cost vs Performance\n(Bottom-Left Quadrant = Best)", fontsize=14, fontweight="bold", pad=15)
    ax.grid(True, alpha=0.3)

    # Add quadrant lines at median
    if len(df_cost) >= 2:
        median_time = df_cost["geo_mean_time_s"].median()
        median_cost = df_cost["cost_per_query"].median()
        ax.axvline(median_time, color="gray", linestyle="--", alpha=0.5, linewidth=1)
        ax.axhline(median_cost, color="gray", linestyle="--", alpha=0.5, linewidth=1)

    plt.tight_layout()
    plt.savefig(f"{config['output_dir']}/cost_vs_performance.png", dpi=150, bbox_inches="tight")
    plt.show()

    print(f"üíæ Saved: {config['output_dir']}/cost_vs_performance.png")
else:
    print("‚ö†Ô∏è  Insufficient cost data for visualization")

## 8. Statistical Comparison

Perform statistical tests to determine if performance differences are significant.

In [None]:
from scipy import stats


def compare_platforms_statistically(df: pd.DataFrame, platform1: str, platform2: str) -> Dict:
    """Compare two platforms using statistical tests.

    Args:
        df: DataFrame with normalized results
        platform1: First platform name
        platform2: Second platform name

    Returns:
        Dictionary with statistical test results
    """
    p1_times = df[df["platform"] == platform1]["execution_time_s"].values
    p2_times = df[df["platform"] == platform2]["execution_time_s"].values

    if len(p1_times) == 0 or len(p2_times) == 0:
        return None

    # Mann-Whitney U test (non-parametric, good for performance data)
    statistic, p_value = stats.mannwhitneyu(p1_times, p2_times, alternative="two-sided")

    # Calculate effect size (Cohen's d)
    mean1, mean2 = np.mean(p1_times), np.mean(p2_times)
    std_pooled = np.sqrt((np.var(p1_times) + np.var(p2_times)) / 2)
    cohens_d = (mean1 - mean2) / std_pooled if std_pooled > 0 else 0

    return {
        "platform1": platform1,
        "platform2": platform2,
        "p_value": p_value,
        "significant": p_value < 0.05,
        "cohens_d": cohens_d,
        "mean_diff": mean1 - mean2,
        "mean_diff_pct": ((mean1 - mean2) / mean2 * 100) if mean2 > 0 else 0,
    }


# Compare all platform pairs
platforms = df_common["platform"].unique()
comparisons = []

for i, p1 in enumerate(platforms):
    for p2 in platforms[i + 1 :]:
        result = compare_platforms_statistically(df_common, p1, p2)
        if result:
            comparisons.append(result)

df_comparisons = pd.DataFrame(comparisons).sort_values("p_value")

print("üìä Statistical Platform Comparisons (Mann-Whitney U Test)\n")
print("Significant differences (p < 0.05):\n")

for _, row in df_comparisons[df_comparisons["significant"]].iterrows():
    faster = row["platform2"] if row["mean_diff"] > 0 else row["platform1"]
    slower = row["platform1"] if row["mean_diff"] > 0 else row["platform2"]
    diff_pct = abs(row["mean_diff_pct"])

    print(f"  ‚úì {faster.capitalize()} is significantly faster than {slower.capitalize()}")
    print(f"    Difference: {diff_pct:.1f}% (p={row['p_value']:.4f}, Cohen's d={abs(row['cohens_d']):.2f})\n")

if not any(df_comparisons["significant"]):
    print("  No statistically significant differences found (all p ‚â• 0.05)")
    print("  This could mean platforms have similar performance, or sample size is too small.")

## 9. Platform Selection Recommendations

Generate data-driven recommendations based on workload characteristics.

In [None]:
def generate_recommendations(platform_stats: pd.DataFrame, platform_costs: Dict, platform_wins: pd.Series) -> List[str]:
    """Generate platform selection recommendations."""

    recommendations = []

    # 1. Overall fastest
    fastest = platform_stats["execution_time_s_geomean"].idxmin()
    fastest_time = platform_stats.loc[fastest, "execution_time_s_geomean"]
    recommendations.append(f"**Fastest Overall**: {fastest.capitalize()} ({fastest_time:.3f}s geometric mean)")

    # 2. Most consistent
    # Use coefficient of variation (CV) = std / mean
    platform_stats["cv"] = platform_stats["execution_time_s_std"] / platform_stats["execution_time_s_mean"]
    most_consistent = platform_stats["cv"].idxmin()
    cv = platform_stats.loc[most_consistent, "cv"]
    recommendations.append(f"**Most Consistent**: {most_consistent.capitalize()} (CV={cv:.2f}, lower variance)")

    # 3. Most cost-effective (if cost data available)
    if platform_costs:
        # Find platform with best cost/performance ratio
        cost_perf = {}
        for platform in platform_stats.index:
            if platform in platform_costs and platform_costs[platform] > 0:
                geo_mean = platform_stats.loc[platform, "execution_time_s_geomean"]
                # Performance score (inverse of time) divided by cost
                cost_perf[platform] = (1 / geo_mean) / platform_costs[platform]

        if cost_perf:
            most_cost_effective = max(cost_perf, key=cost_perf.get)
            recommendations.append(
                f"**Most Cost-Effective**: {most_cost_effective.capitalize()} (best performance per dollar)"
            )

    # 4. Most versatile (wins most query types)
    if len(platform_wins) > 0:
        most_versatile = platform_wins.idxmax()
        win_pct = (platform_wins[most_versatile] / platform_wins.sum()) * 100
        recommendations.append(
            f"**Most Versatile**: {most_versatile.capitalize()} (fastest for {win_pct:.1f}% of queries)"
        )

    # 5. Best for small datasets
    local_platforms = [p for p in platform_stats.index if p in ["duckdb", "sqlite"]]
    if local_platforms:
        fastest_local = min(local_platforms, key=lambda p: platform_stats.loc[p, "execution_time_s_geomean"])
        recommendations.append(
            f"**Best for Local/Small Data**: {fastest_local.capitalize()} (no cloud costs, good for <1GB)"
        )

    # 6. Best for large scale
    cloud_platforms = [p for p in platform_stats.index if p not in ["duckdb", "sqlite"]]
    if cloud_platforms:
        fastest_cloud = min(cloud_platforms, key=lambda p: platform_stats.loc[p, "execution_time_s_geomean"])
        recommendations.append(
            f"**Best for Large Scale**: {fastest_cloud.capitalize()} (cloud scalability, fast for large datasets)"
        )

    return recommendations


# Generate recommendations
recommendations = generate_recommendations(platform_stats, platform_costs, platform_wins)

print("üéØ Platform Selection Recommendations\n")
print("Based on benchmark results:\n")
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")

print("\nüí° General Guidance:")
print("  ‚Ä¢ **Development/Testing**: Use DuckDB or SQLite for fast iteration")
print("  ‚Ä¢ **Production Analytics**: Choose based on your data scale and budget")
print("  ‚Ä¢ **Cost-Sensitive**: Consider local platforms or most cost-effective cloud option")
print("  ‚Ä¢ **Performance-Critical**: Choose the fastest platform for your specific queries")
print("  ‚Ä¢ **Hybrid Approach**: Use local for dev, cloud for production")

## 10. Export Comparison Results

Save comparison data for future reference and reporting.

In [None]:
# Export comprehensive comparison report
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# 1. Platform statistics to CSV
platform_stats_file = f"{config['output_dir']}/platform_statistics_{timestamp}.csv"
platform_stats.to_csv(platform_stats_file)
print(f"‚úÖ Exported platform statistics: {platform_stats_file}")

# 2. Query comparison to CSV
query_comp_file = f"{config['output_dir']}/query_comparison_{timestamp}.csv"
query_comparison.to_csv(query_comp_file)
print(f"‚úÖ Exported query comparison: {query_comp_file}")

# 3. Cost analysis to CSV (if available)
if len(df_cost) > 0:
    cost_file = f"{config['output_dir']}/cost_analysis_{timestamp}.csv"
    df_cost.to_csv(cost_file, index=False)
    print(f"‚úÖ Exported cost analysis: {cost_file}")

# 4. Complete report to JSON
report = {
    "timestamp": timestamp,
    "platforms_compared": list(platform_results.keys()),
    "total_queries": len(df_common),
    "common_queries": df_common["query"].nunique(),
    "platform_stats": platform_stats.to_dict(),
    "platform_wins": platform_wins.to_dict() if len(platform_wins) > 0 else {},
    "platform_costs": platform_costs,
    "recommendations": recommendations,
    "statistical_comparisons": df_comparisons.to_dict("records") if len(comparisons) > 0 else [],
}

report_file = f"{config['output_dir']}/comparison_report_{timestamp}.json"
with open(report_file, "w") as f:
    json.dump(report, f, indent=2, default=str)

print(f"‚úÖ Exported comprehensive report: {report_file}")
print(f"\nüìÅ All results saved to: {config['output_dir']}")

## 11. Summary and Next Steps

In [None]:
print("=" * 70)
print("üìä PLATFORM COMPARISON SUMMARY")
print("=" * 70)
print(f"\nPlatforms Analyzed: {len(platform_results)}")
print(f"Queries Compared: {df_common['query'].nunique()}")
print(f"Total Executions: {len(df_common)}")

print("\nüèÜ Top 3 Performers (by geometric mean):")
for i, platform in enumerate(platform_stats.head(3).index, 1):
    geo_mean = platform_stats.loc[platform, "execution_time_s_geomean"]
    print(f"  {i}. {platform.capitalize()}: {geo_mean:.3f}s")

if platform_costs:
    print("\nüí∞ Most Cost-Effective:")
    cheapest = min(platform_costs, key=platform_costs.get)
    print(f"  {cheapest.capitalize()}: ${platform_costs[cheapest]:.4f} for benchmark run")

print("\nüìà Key Insights:")
if len(platform_wins) > 0:
    leader = platform_wins.idxmax()
    leader_pct = (platform_wins[leader] / platform_wins.sum()) * 100
    print(f"  ‚Ä¢ {leader.capitalize()} was fastest for {leader_pct:.1f}% of queries")

max_speedup = query_comparison["speedup"].max() if len(query_comparison) > 0 else 0
if max_speedup > 1:
    print(f"  ‚Ä¢ Up to {max_speedup:.1f}x performance difference between platforms for same query")

print(f"\nüìÅ Results exported to: {config['output_dir']}")
print("\nüîç Next Steps:")
print("  1. Review platform-specific notebooks for detailed optimization guidance")
print("  2. Run benchmarks at larger scale factors for production-scale testing")
print("  3. Test with your actual queries using BenchBox custom query feature")
print("  4. Consider hybrid approach: local for dev, cloud for production")
print("\n" + "=" * 70)