# BenchBox Result Analysis Patterns

This notebook demonstrates **advanced analytical patterns** for interpreting benchmark results. Learn how to detect performance regressions, identify trends, and perform root cause analysis.

## What You'll Learn

- **Time series analysis**: Track performance changes over time
- **Regression detection**: Identify performance degradation
- **Variance analysis**: Understand performance stability
- **Baseline comparison**: Compare against reference results
- **Statistical testing**: Determine if changes are significant
- **Anomaly detection**: Find outliers and unusual patterns
- **Trend analysis**: Predict future performance
- **Root cause patterns**: Debug performance issues

## Use Cases

- **CI/CD integration**: Automated performance validation
- **Performance monitoring**: Track trends over releases
- **Capacity planning**: Predict when scaling is needed
- **Issue investigation**: Identify root causes of slowdowns

## Expected Runtime

Analysis runs on existing results:
- Data loading: **10-20 seconds**
- Statistical analysis: **30-60 seconds**
- Complete notebook: **2-3 minutes**

## 1. Setup and Load Results

In [None]:
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

# Configuration
config = {
    "results_dir": "./benchmark_results",
    "baseline_file": "./benchmark_results/baseline.json",  # Reference results
    "output_dir": "./analysis_results",
    # Analysis thresholds
    "regression_threshold": 0.10,  # 10% slowdown = regression
    "significance_level": 0.05,  # p-value threshold
    "outlier_std": 3.0,  # 3 std deviations = outlier
}

os.makedirs(config["output_dir"], exist_ok=True)

print("‚úÖ Analysis environment configured")
print(f"üìÅ Results directory: {config['results_dir']}")
print(f"‚öôÔ∏è  Regression threshold: {config['regression_threshold'] * 100:.0f}%")

In [None]:
def load_all_results(results_dir: str) -> pd.DataFrame:
    """Load all benchmark results from a directory into a DataFrame.

    Returns:
        DataFrame with columns: timestamp, platform, benchmark, scale_factor,
                               query, execution_time_s, success, file
    """
    results_path = Path(results_dir)

    if not results_path.exists():
        print(f"‚ö†Ô∏è  Directory not found: {results_dir}")
        return pd.DataFrame()

    all_records = []

    # Recursively find all JSON files
    for json_file in results_path.rglob("*.json"):
        try:
            with open(json_file) as f:
                result = json.load(f)

            # Skip if not a benchmark result
            if "query_results" not in result:
                continue

            # Extract metadata
            platform = result.get("platform", json_file.parent.name)
            benchmark = result.get("benchmark_name", "unknown")
            scale_factor = result.get("scale_factor", 0.0)
            file_time = datetime.fromtimestamp(json_file.stat().st_mtime)

            # Extract query results
            for qr in result.get("query_results", []):
                all_records.append(
                    {
                        "timestamp": file_time,
                        "platform": platform,
                        "benchmark": benchmark,
                        "scale_factor": scale_factor,
                        "query": qr.get("query_name", qr.get("query_id", "unknown")),
                        "execution_time_ms": qr.get("execution_time_ms", None),
                        "success": qr.get("success", False),
                        "file": json_file.name,
                    }
                )

        except Exception as e:
            print(f"‚ö†Ô∏è  Error loading {json_file.name}: {e}")

    if not all_records:
        print("‚ö†Ô∏è  No benchmark results found")
        return pd.DataFrame()

    df = pd.DataFrame(all_records)
    df["execution_time_s"] = df["execution_time_ms"] / 1000.0

    return df


# Load all results
df_all = load_all_results(config["results_dir"])

if len(df_all) > 0:
    print(f"\nüìä Loaded {len(df_all)} query executions")
    print(f"   Platforms: {df_all['platform'].nunique()}")
    print(f"   Benchmarks: {df_all['benchmark'].nunique()}")
    print(f"   Time range: {df_all['timestamp'].min()} to {df_all['timestamp'].max()}")
    print(f"   Success rate: {df_all['success'].mean() * 100:.1f}%")
else:
    print("‚ö†Ô∏è  No data loaded. This notebook will use synthetic data for demonstration.")

    # Generate synthetic data for demonstration
    np.random.seed(42)
    dates = pd.date_range(start="2024-01-01", periods=10, freq="W")
    queries = [f"Q{i}" for i in range(1, 11)]

    records = []
    for i, date in enumerate(dates):
        for query in queries:
            # Add gradual performance degradation over time
            base_time = 1.0
            time_factor = 1 + (i * 0.05)  # 5% degradation per week
            noise = np.random.normal(1, 0.1)
            exec_time = base_time * time_factor * noise

            records.append(
                {
                    "timestamp": date,
                    "platform": "DuckDB",
                    "benchmark": "TPC-H",
                    "scale_factor": 0.1,
                    "query": query,
                    "execution_time_s": exec_time,
                    "success": True,
                    "file": f"synthetic_{date.strftime('%Y%m%d')}.json",
                }
            )

    df_all = pd.DataFrame(records)
    print(f"\nüìä Generated synthetic data: {len(df_all)} executions")

## 2. Time Series Analysis

In [None]:
# Analyze performance trends over time
def analyze_time_series(df: pd.DataFrame, platform: str, query: str) -> Dict:
    """Analyze performance trend for a specific query over time."""

    query_data = df[(df["platform"] == platform) & (df["query"] == query) & (df["success"] == True)].sort_values(
        "timestamp"
    )

    if len(query_data) < 2:
        return None

    # Calculate trend using linear regression
    x = np.arange(len(query_data))
    y = query_data["execution_time_s"].values

    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

    # Calculate performance change
    first_time = query_data["execution_time_s"].iloc[0]
    last_time = query_data["execution_time_s"].iloc[-1]
    pct_change = ((last_time - first_time) / first_time) * 100

    return {
        "query": query,
        "data_points": len(query_data),
        "first_time": first_time,
        "last_time": last_time,
        "pct_change": pct_change,
        "slope": slope,
        "r_squared": r_value**2,
        "p_value": p_value,
        "trend": "increasing" if slope > 0 else "decreasing",
    }


# Analyze trends for all queries
if len(df_all) > 0:
    platform = df_all["platform"].iloc[0]
    trends = []

    for query in df_all["query"].unique():
        trend = analyze_time_series(df_all, platform, query)
        if trend:
            trends.append(trend)

    df_trends = pd.DataFrame(trends).sort_values("pct_change", ascending=False)

    print(f"üìà Time Series Analysis ({platform})\n")
    print("Queries with biggest performance changes:\n")
    print(df_trends.head(10).to_string(index=False))

    # Flag concerning trends
    degrading = df_trends[df_trends["pct_change"] > 10]
    if len(degrading) > 0:
        print(f"\n‚ö†Ô∏è  {len(degrading)} queries showing >10% performance degradation")
        for _, row in degrading.iterrows():
            print(f"   {row['query']}: {row['pct_change']:.1f}% slower")

In [None]:
# Visualization: Time series plot
if len(df_all) > 0:
    fig, ax = plt.subplots(figsize=(14, 6))

    # Plot top 5 queries with most change
    top_queries = df_trends.head(5)["query"].values

    for query in top_queries:
        query_data = df_all[(df_all["query"] == query) & (df_all["success"] == True)].sort_values("timestamp")

        ax.plot(
            query_data["timestamp"], query_data["execution_time_s"], marker="o", label=query, linewidth=2, markersize=6
        )

    ax.set_xlabel("Date", fontsize=12, fontweight="bold")
    ax.set_ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold")
    ax.set_title("Query Performance Over Time\nTop 5 Queries by Change", fontsize=14, fontweight="bold", pad=20)
    ax.legend(title="Query", title_fontsize=11, fontsize=10)
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45, ha="right")

    plt.tight_layout()
    plt.savefig(f"{config['output_dir']}/time_series_analysis.png", dpi=150, bbox_inches="tight")
    plt.show()

    print("üíæ Saved: time_series_analysis.png")

## 3. Regression Detection

In [None]:
def detect_regressions(df: pd.DataFrame, baseline_file: Optional[str] = None, threshold: float = 0.10) -> pd.DataFrame:
    """Detect performance regressions compared to baseline.

    Args:
        df: DataFrame with current results
        baseline_file: Path to baseline results (if None, uses oldest results)
        threshold: Regression threshold (e.g., 0.10 = 10% slower)

    Returns:
        DataFrame with regression analysis
    """
    # Determine baseline
    if baseline_file and Path(baseline_file).exists():
        with open(baseline_file) as f:
            baseline_result = json.load(f)

        baseline_records = []
        for qr in baseline_result.get("query_results", []):
            baseline_records.append(
                {
                    "query": qr.get("query_name", qr.get("query_id")),
                    "baseline_time": qr.get("execution_time_ms", 0) / 1000.0,
                }
            )
        df_baseline = pd.DataFrame(baseline_records)
    else:
        # Use earliest timestamp as baseline
        earliest = df["timestamp"].min()
        df_baseline = df[df["timestamp"] == earliest].groupby("query").agg({"execution_time_s": "mean"}).reset_index()
        df_baseline.columns = ["query", "baseline_time"]

    # Get latest results
    latest = df["timestamp"].max()
    df_latest = df[df["timestamp"] == latest].groupby("query").agg({"execution_time_s": "mean"}).reset_index()
    df_latest.columns = ["query", "current_time"]

    # Compare
    df_comparison = df_baseline.merge(df_latest, on="query", how="inner")
    df_comparison["time_diff"] = df_comparison["current_time"] - df_comparison["baseline_time"]
    df_comparison["pct_change"] = (
        (df_comparison["current_time"] - df_comparison["baseline_time"]) / df_comparison["baseline_time"] * 100
    )

    # Flag regressions
    df_comparison["regression"] = df_comparison["pct_change"] > (threshold * 100)
    df_comparison["improvement"] = df_comparison["pct_change"] < -(threshold * 100)

    return df_comparison.sort_values("pct_change", ascending=False)


# Detect regressions
if len(df_all) > 0:
    df_regressions = detect_regressions(
        df_all, baseline_file=config.get("baseline_file"), threshold=config["regression_threshold"]
    )

    print("üîç Regression Detection Analysis\n")
    print(f"Threshold: {config['regression_threshold'] * 100:.0f}%\n")

    regressions = df_regressions[df_regressions["regression"]]
    improvements = df_regressions[df_regressions["improvement"]]

    if len(regressions) > 0:
        print(f"‚ùå {len(regressions)} REGRESSIONS DETECTED:\n")
        for _, row in regressions.iterrows():
            print(
                f"   {row['query']}: {row['baseline_time']:.3f}s ‚Üí {row['current_time']:.3f}s ({row['pct_change']:+.1f}%)"
            )
    else:
        print("‚úÖ No regressions detected")

    if len(improvements) > 0:
        print(f"\n‚ú® {len(improvements)} IMPROVEMENTS DETECTED:\n")
        for _, row in improvements.iterrows():
            print(
                f"   {row['query']}: {row['baseline_time']:.3f}s ‚Üí {row['current_time']:.3f}s ({row['pct_change']:+.1f}%)"
            )

In [None]:
# Visualization: Regression waterfall chart
if len(df_all) > 0 and "df_regressions" in locals():
    fig, ax = plt.subplots(figsize=(12, max(6, len(df_regressions) * 0.3)))

    # Sort by change
    df_plot = df_regressions.sort_values("pct_change")

    # Color by status
    colors = [
        "red" if reg else "green" if imp else "gray" for reg, imp in zip(df_plot["regression"], df_plot["improvement"])
    ]

    bars = ax.barh(df_plot["query"], df_plot["pct_change"], color=colors, alpha=0.7)

    # Add threshold lines
    ax.axvline(
        config["regression_threshold"] * 100,
        color="red",
        linestyle="--",
        linewidth=2,
        alpha=0.5,
        label="Regression threshold",
    )
    ax.axvline(
        -config["regression_threshold"] * 100,
        color="green",
        linestyle="--",
        linewidth=2,
        alpha=0.5,
        label="Improvement threshold",
    )
    ax.axvline(0, color="black", linestyle="-", linewidth=1, alpha=0.3)

    ax.set_xlabel("Performance Change (%)", fontsize=12, fontweight="bold")
    ax.set_ylabel("Query", fontsize=12, fontweight="bold")
    ax.set_title(
        "Regression Detection: Baseline vs Current\nRed=Regression, Green=Improvement, Gray=Unchanged",
        fontsize=14,
        fontweight="bold",
        pad=20,
    )
    ax.legend()
    ax.grid(axis="x", alpha=0.3)

    plt.tight_layout()
    plt.savefig(f"{config['output_dir']}/regression_detection.png", dpi=150, bbox_inches="tight")
    plt.show()

    print("üíæ Saved: regression_detection.png")

## 4. Performance Variance Analysis

In [None]:
# Analyze stability and variance
def analyze_variance(df: pd.DataFrame) -> pd.DataFrame:
    """Analyze performance variance and stability for each query."""

    variance_stats = df.groupby("query").agg({"execution_time_s": ["mean", "std", "min", "max", "count"]}).round(4)

    variance_stats.columns = ["mean", "std", "min", "max", "count"]
    variance_stats = variance_stats.reset_index()

    # Calculate coefficient of variation (CV = std / mean)
    variance_stats["cv"] = (variance_stats["std"] / variance_stats["mean"]).round(3)

    # Calculate range
    variance_stats["range"] = variance_stats["max"] - variance_stats["min"]

    # Stability score (inverse of CV, higher is better)
    variance_stats["stability_score"] = (1 / (1 + variance_stats["cv"])).round(3)

    return variance_stats.sort_values("cv", ascending=False)


if len(df_all) > 0:
    df_variance = analyze_variance(df_all[df_all["success"] == True])

    print("üìä Performance Variance Analysis\n")
    print("Queries with highest variance (least stable):\n")
    print(df_variance.head(10).to_string(index=False))

    print("\nüéØ Stability Insights:")
    print(f"   Most stable query: {df_variance.iloc[-1]['query']} (CV={df_variance.iloc[-1]['cv']:.3f})")
    print(f"   Least stable query: {df_variance.iloc[0]['query']} (CV={df_variance.iloc[0]['cv']:.3f})")
    print(f"   Average CV: {df_variance['cv'].mean():.3f}")

    # Flag highly variable queries
    unstable = df_variance[df_variance["cv"] > 0.3]
    if len(unstable) > 0:
        print(f"\n‚ö†Ô∏è  {len(unstable)} queries with high variance (CV > 0.3):")
        for _, row in unstable.iterrows():
            print(f"   {row['query']}: {row['min']:.3f}s - {row['max']:.3f}s (CV={row['cv']:.3f})")

## 5. Anomaly Detection

In [None]:
def detect_anomalies(df: pd.DataFrame, std_threshold: float = 3.0) -> pd.DataFrame:
    """Detect anomalous query executions using z-score method.

    Args:
        df: DataFrame with query results
        std_threshold: Number of standard deviations to flag as anomaly

    Returns:
        DataFrame with anomalies
    """
    anomalies = []

    for query in df["query"].unique():
        query_data = df[df["query"] == query].copy()

        if len(query_data) < 3:
            continue

        # Calculate z-scores
        mean = query_data["execution_time_s"].mean()
        std = query_data["execution_time_s"].std()

        if std == 0:
            continue

        query_data["z_score"] = np.abs((query_data["execution_time_s"] - mean) / std)
        query_data["is_anomaly"] = query_data["z_score"] > std_threshold

        # Get anomalies
        query_anomalies = query_data[query_data["is_anomaly"]]

        if len(query_anomalies) > 0:
            anomalies.extend(query_anomalies.to_dict("records"))

    return pd.DataFrame(anomalies) if anomalies else pd.DataFrame()


if len(df_all) > 0:
    df_anomalies = detect_anomalies(df_all[df_all["success"] == True], std_threshold=config["outlier_std"])

    print(f"üîç Anomaly Detection (>{config['outlier_std']} std deviations)\n")

    if len(df_anomalies) > 0:
        print(f"‚ö†Ô∏è  {len(df_anomalies)} anomalous executions detected:\n")

        for _, row in df_anomalies.sort_values("z_score", ascending=False).head(10).iterrows():
            print(f"   {row['query']} @ {row['timestamp']}: {row['execution_time_s']:.3f}s (z={row['z_score']:.2f})")

        print("\nüìä Anomaly Statistics:")
        print(f"   Total anomalies: {len(df_anomalies)}")
        print(f"   Queries affected: {df_anomalies['query'].nunique()}")
        print(f"   Anomaly rate: {len(df_anomalies) / len(df_all) * 100:.2f}%")
    else:
        print("‚úÖ No anomalies detected")

## 6. Statistical Significance Testing

In [None]:
def test_statistical_significance(df: pd.DataFrame, query: str, before_date: datetime, after_date: datetime) -> Dict:
    """Test if performance change is statistically significant.

    Uses Mann-Whitney U test (non-parametric) to compare distributions.
    """
    query_data = df[df["query"] == query]

    before = query_data[query_data["timestamp"] <= before_date]["execution_time_s"].values
    after = query_data[query_data["timestamp"] > after_date]["execution_time_s"].values

    if len(before) < 2 or len(after) < 2:
        return None

    # Mann-Whitney U test
    statistic, p_value = stats.mannwhitneyu(before, after, alternative="two-sided")

    # Effect size (Cohen's d)
    mean_before, mean_after = np.mean(before), np.mean(after)
    std_pooled = np.sqrt((np.var(before) + np.var(after)) / 2)
    cohens_d = (mean_after - mean_before) / std_pooled if std_pooled > 0 else 0

    return {
        "query": query,
        "n_before": len(before),
        "n_after": len(after),
        "mean_before": mean_before,
        "mean_after": mean_after,
        "pct_change": ((mean_after - mean_before) / mean_before * 100) if mean_before > 0 else 0,
        "p_value": p_value,
        "significant": p_value < config["significance_level"],
        "cohens_d": cohens_d,
    }


# Test significance for queries with changes
if len(df_all) > 0 and len(df_all["timestamp"].unique()) >= 2:
    # Split at midpoint
    timestamps = sorted(df_all["timestamp"].unique())
    midpoint = timestamps[len(timestamps) // 2]

    sig_tests = []
    for query in df_all["query"].unique():
        result = test_statistical_significance(
            df_all[df_all["success"] == True], query, before_date=midpoint, after_date=midpoint
        )
        if result:
            sig_tests.append(result)

    df_sig = pd.DataFrame(sig_tests).sort_values("p_value")

    print("üìä Statistical Significance Testing\n")
    print(f"Comparing before/after {midpoint.strftime('%Y-%m-%d')}")
    print(f"Significance level: Œ± = {config['significance_level']}\n")

    significant = df_sig[df_sig["significant"]]

    if len(significant) > 0:
        print(f"‚úÖ {len(significant)} queries with statistically significant changes:\n")
        for _, row in significant.head(5).iterrows():
            direction = "slower" if row["pct_change"] > 0 else "faster"
            print(
                f"   {row['query']}: {abs(row['pct_change']):.1f}% {direction} (p={row['p_value']:.4f}, d={row['cohens_d']:.2f})"
            )
    else:
        print("No statistically significant changes detected")

## 7. Root Cause Analysis Patterns

In [None]:
def root_cause_analysis(df: pd.DataFrame, problematic_queries: List[str]) -> Dict:
    """Analyze patterns in problematic queries to identify root causes.

    Args:
        df: DataFrame with all results
        problematic_queries: List of query names showing issues

    Returns:
        Dictionary with potential root cause indicators
    """
    analysis = {
        "affected_queries": problematic_queries,
        "patterns": [],
    }

    problem_data = df[df["query"].isin(problematic_queries)]
    normal_data = df[~df["query"].isin(problematic_queries)]

    # Pattern 1: Timing patterns
    if "timestamp" in df.columns:
        problem_times = problem_data["timestamp"].dt.hour.value_counts()
        if len(problem_times) > 0:
            peak_hour = problem_times.idxmax()
            analysis["patterns"].append(f"Most issues occur at hour {peak_hour}:00 (possible resource contention)")

    # Pattern 2: Scale factor correlation
    if "scale_factor" in df.columns:
        problem_scales = problem_data["scale_factor"].unique()
        if len(problem_scales) > 0:
            analysis["patterns"].append(f"Issues appear at scale factors: {', '.join(map(str, problem_scales))}")

    # Pattern 3: Query characteristics
    # Check if query names have common patterns
    query_numbers = []
    for q in problematic_queries:
        try:
            num = int("".join(filter(str.isdigit, q)))
            query_numbers.append(num)
        except:
            pass

    if query_numbers:
        avg_num = np.mean(query_numbers)
        if avg_num > 15:
            analysis["patterns"].append("Issues concentrated in higher-numbered queries (complex queries)")

    # Pattern 4: Temporal clustering
    if len(problem_data) > 1 and "timestamp" in df.columns:
        time_diffs = problem_data.sort_values("timestamp")["timestamp"].diff().dt.total_seconds()
        if time_diffs.median() < 3600:  # Within 1 hour
            analysis["patterns"].append("Issues clustered in time (possible infrastructure event)")

    return analysis


# Perform root cause analysis on regressions
if len(df_all) > 0 and "df_regressions" in locals():
    problematic = df_regressions[df_regressions["regression"]]["query"].tolist()

    if problematic:
        rca = root_cause_analysis(df_all, problematic)

        print("üîç Root Cause Analysis\n")
        print(f"Analyzing {len(rca['affected_queries'])} problematic queries:\n")

        if rca["patterns"]:
            print("Potential root cause indicators:")
            for i, pattern in enumerate(rca["patterns"], 1):
                print(f"  {i}. {pattern}")
        else:
            print("No clear patterns identified. Consider:")
            print("  ‚Ä¢ Check for infrastructure changes")
            print("  ‚Ä¢ Review query plans for affected queries")
            print("  ‚Ä¢ Examine data volume changes")
            print("  ‚Ä¢ Look for schema modifications")

## 8. Export Analysis Report

In [None]:
# Generate comprehensive analysis report
if len(df_all) > 0:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    report = {
        "timestamp": timestamp,
        "analysis_date": datetime.now().isoformat(),
        "data_range": {
            "start": df_all["timestamp"].min().isoformat(),
            "end": df_all["timestamp"].max().isoformat(),
            "total_executions": len(df_all),
        },
        "regressions": df_regressions.to_dict("records") if "df_regressions" in locals() else [],
        "trends": df_trends.to_dict("records") if "df_trends" in locals() else [],
        "variance": df_variance.to_dict("records") if "df_variance" in locals() else [],
        "anomalies": df_anomalies.to_dict("records") if "df_anomalies" in locals() and len(df_anomalies) > 0 else [],
        "config": config,
    }

    report_file = f"{config['output_dir']}/analysis_report_{timestamp}.json"
    with open(report_file, "w") as f:
        json.dump(report, f, indent=2, default=str)

    print(f"‚úÖ Exported comprehensive analysis report: {report_file}")

    # Export summary CSV files
    if "df_regressions" in locals():
        df_regressions.to_csv(f"{config['output_dir']}/regressions_{timestamp}.csv", index=False)
        print("‚úÖ Exported regressions CSV")

    if "df_variance" in locals():
        df_variance.to_csv(f"{config['output_dir']}/variance_{timestamp}.csv", index=False)
        print("‚úÖ Exported variance analysis CSV")

    print(f"\nüìÅ All reports saved to: {config['output_dir']}")

## 9. Summary and Recommendations

In [None]:
if len(df_all) > 0:
    print("=" * 70)
    print("üìä ANALYSIS SUMMARY")
    print("=" * 70)

    print("\nüìà Data Overview:")
    print(f"   Time range: {df_all['timestamp'].min()} to {df_all['timestamp'].max()}")
    print(f"   Total executions: {len(df_all)}")
    print(f"   Unique queries: {df_all['query'].nunique()}")
    print(f"   Success rate: {df_all['success'].mean() * 100:.1f}%")

    if "df_regressions" in locals() and len(df_regressions) > 0:
        regressions = df_regressions[df_regressions["regression"]]
        improvements = df_regressions[df_regressions["improvement"]]
        print("\nüîç Regression Analysis:")
        print(f"   Regressions: {len(regressions)}")
        print(f"   Improvements: {len(improvements)}")
        print(f"   Unchanged: {len(df_regressions) - len(regressions) - len(improvements)}")

    if "df_anomalies" in locals() and len(df_anomalies) > 0:
        print("\n‚ö†Ô∏è  Anomalies:")
        print(f"   Anomalous executions: {len(df_anomalies)}")
        print(f"   Anomaly rate: {len(df_anomalies) / len(df_all) * 100:.2f}%")

    if "df_variance" in locals():
        print("\nüìä Stability:")
        print(f"   Most stable: {df_variance.iloc[-1]['query']} (CV={df_variance.iloc[-1]['cv']:.3f})")
        print(f"   Least stable: {df_variance.iloc[0]['query']} (CV={df_variance.iloc[0]['cv']:.3f})")

    print("\nüí° Recommendations:")

    if "regressions" in locals() and len(regressions) > 0:
        print(f"   1. Investigate {len(regressions)} queries with performance regressions")
        print(f"      Priority: {regressions.iloc[0]['query']} ({regressions.iloc[0]['pct_change']:+.1f}%)")
    else:
        print("   1. ‚úÖ No performance regressions detected")

    if "df_anomalies" in locals() and len(df_anomalies) > 0:
        print(f"   2. Review {len(df_anomalies)} anomalous executions for infrastructure issues")

    if "df_variance" in locals():
        unstable = df_variance[df_variance["cv"] > 0.3]
        if len(unstable) > 0:
            print(f"   3. Stabilize {len(unstable)} queries with high variance (CV > 0.3)")

    print("   4. Establish baseline for ongoing regression testing")
    print("   5. Integrate analysis into CI/CD pipeline")
    print("   6. Set up automated alerts for regressions and anomalies")

    print(f"\nüìÅ Detailed reports: {config['output_dir']}")
    print("\n" + "=" * 70)