# BENCHMARK: Concurrent vs Sequential Stats Processing

Benchmarking comparing concurrent (batch processing), sequential (standard), and legacy endpoints.

## Benchmark Structure

- **BENCHMARK 1**: Concurrent processing (high-volume endpoint, batches)
- **BENCHMARK 2**: Sequential processing (standard endpoint, sequential)
- **BENCHMARK 3**: Legacy endpoint comparison
- **ANALYSIS**: Performance comparison, scaling behavior, recommendations

In [None]:
import time
import pandas as pd
import json
import tempfile
import os
from pathlib import Path
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import ee

# Set up Downloads path for all outputs
downloads_path = Path.home() / "Downloads" / "whisp_benchmarks"
downloads_path.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {downloads_path}")

# Initialize Earth Engine
try:
    ee.Initialize()
    print("Earth Engine initialized")
except:
    ee.Authenticate()
    ee.Initialize()
    print("Earth Engine authenticated and initialized")



In [None]:
ee.Reset()
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
print("Earth Engine reset and initialized with HIGH-VOLUME endpoint for benchmarking")

In [None]:
# Import WHISP
import openforis_whisp as whisp
from openforis_whisp.concurrent_stats import validate_ee_endpoint, check_ee_endpoint
print("WHISP imported with endpoint validation functions")

In [None]:
# Create Whisp image (reuse if exists, create once for all benchmarks)
print("\nüì¶ Creating WHISP image for benchmarking...")
iso2_codes = ['br', 'co', 'ci']

try:
    whisp_image_bench = whisp.combine_datasets(national_codes=iso2_codes,auto_recovery=True)
    print(f"‚úÖ Created WHISP image for benchmarking")
except Exception as e:
    print(f"‚ö†Ô∏è  Using existing whisp_image from earlier")
    whisp_image_bench = whisp_image


In [None]:
from openforis_whisp.advanced_stats import validate_ee_endpoint, check_ee_endpoint

In [None]:


# Define Brazil Amazon test region
test_region_states = ["Amazonas", "Mato Grosso", "Rond√¥nia"]
print(f"Test region: Brazil Amazon ({', '.join(test_region_states)})")


print("\nBenchmark Parameters:")
print("  - Concurrent vs Sequential comparison")
print("  - Multiple polygon counts and complexity levels")
print("  - Multiple repetitions for statistical significance")

In [None]:
precompiled_image = True

In [None]:
try:
    whisp_image
    print("Using existing whisp_image from earlier")
except NameError:
    print("Creating WHISP image for benchmarking...")
    try:
        whisp_image = whisp.combine_datasets()
        print("Created WHISP image for benchmarking")
    except Exception as e:
        print(f"Error creating image: {e}")
        whisp_image = None

print("Helper function defined for test data generation")

In [None]:

# Helper function to generate test GeoJSON
def generate_test_geojson(num_polygons, area_ha, num_vertices, bounds):
    """Generate random test GeoJSON within bounds"""
    try:
        geojson = whisp.generate_test_polygons(
            bounds=bounds,
            num_polygons=num_polygons,
            min_area_ha=area_ha * 0.9,
            max_area_ha=area_ha * 1.1,
            min_number_vert=num_vertices,
            max_number_vert=num_vertices,
        )
        
        # Save to temp file
        import tempfile
        temp_fd, temp_path = tempfile.mkstemp(suffix='.geojson', text=True)
        try:
            with os.fdopen(temp_fd, 'w') as f:
                json.dump(geojson, f)
        except:
            os.close(temp_fd)
            raise
        
        return temp_path, geojson
    except Exception as e:
        print(f"Error generating test data: {e}")
        raise

# Set up logger
logger = whisp.setup_concurrent_logger()

# Get Brazil geometry for testing
try:
    fc = (ee.FeatureCollection("projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1")
    .filter(ee.Filter.inList('gaul1_name', test_region_states)))
    geom = fc.geometry().bounds()
    print("\n‚úÖ Brazil regions geometry loaded")
except Exception as e:
    print(f"Warning: Could not load Brazil geometry: {e}")
    geom = None

print("Helper function defined for test data generation")

In [None]:
# ============================================================================
# PRE-BENCHMARK CHECK: Validate High-Volume Endpoint
# ============================================================================
print("\n" + "="*80)
print("PRE-BENCHMARK CHECK: Endpoint Validation")
print("="*80)

# Check and display current endpoint
api_url = str(ee.data._cloud_api_base_url)
current_endpoint = "HIGH-VOLUME" if "highvolume" in api_url.lower() else "STANDARD"
print(f"\nüìç Current Earth Engine Endpoint: {current_endpoint}")
print(f"   URL: {api_url}")

# Validate that we're using high-volume endpoint for concurrent
try:
    validate_ee_endpoint("high-volume", raise_error=True)
    print("\n‚úÖ High-volume endpoint validated - concurrent benchmark can proceed")
except RuntimeError as e:
    print(f"\n‚ùå Endpoint validation FAILED:")
    print(f"{e}")
    print("\nüîß Attempting to fix by initializing high-volume endpoint...")
    ee.Reset()
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
    print("‚úÖ High-volume endpoint re-initialized")

In [None]:
# ============================================================================
# BENCHMARK 1: CONCURRENT METHOD (High-Volume Endpoint)
# ============================================================================
print("\n" + "="*80)
print("BENCHMARK 1: CONCURRENT METHOD (High-Volume Endpoint)")
print(f"Running {benchmark_params['num_repetitions']} repetitions per test")
print("="*80)

benchmark_results_concurrent = []

# Iterate through all parameter combinations with repetitions
total_combinations = (len(benchmark_params['areas_ha']) * 
                      len(benchmark_params['polygon_counts']) * 
                      len(benchmark_params['vertex_complexity']))
total_tests = total_combinations * benchmark_params['num_repetitions']
test_num = 0

for area_ha in benchmark_params['areas_ha']:
    for num_polygons in benchmark_params['polygon_counts']:
        for num_vertices in benchmark_params['vertex_complexity']:
            # Run each configuration multiple times
            for rep in range(benchmark_params['num_repetitions']):
                test_num += 1
                
                # Generate test data
                geojson_path, geojson_data = generate_test_geojson(
                    num_polygons=num_polygons,
                    area_ha=area_ha,
                    num_vertices=num_vertices,
                    bounds=geom
                )
                
                # Run concurrent processing with timing
                start_time = time.time()
                try:
                    df_result = whisp.whisp_stats_geojson_to_df_concurrent(
                        input_geojson_filepath=geojson_path,
                        national_codes=iso2_codes,
                        add_metadata_server=False,
                        whisp_image=whisp_image_bench if precompiled_image == True else None,
                    )
                    elapsed_time = time.time() - start_time
                    status = "‚úÖ"
                    error_msg = None
                    rows_processed = df_result.shape[0]
                    
                except Exception as e:
                    elapsed_time = time.time() - start_time
                    status = "‚ùå"
                    error_msg = str(e)
                    rows_processed = 0
                
                # Store results
                benchmark_results_concurrent.append({
                    'method': 'Concurrent',
                    'area_ha': area_ha,
                    'num_polygons': num_polygons,
                    'num_vertices': num_vertices,
                    'repetition': rep + 1,
                    'total_features': num_polygons,
                    'rows_processed': rows_processed,
                    'time_seconds': elapsed_time,
                    'time_per_polygon': elapsed_time / num_polygons if num_polygons > 0 else 0,
                    'status': status,
                    'error': error_msg
                })
                
                # Show progress (only show every 3rd rep to reduce clutter)
                if rep == benchmark_params['num_repetitions'] - 1:
                    print(f"[{test_num:3d}/{total_tests}] {status} Area: {area_ha:3d}ha | " + 
                          f"Polygons: {num_polygons:3d} | Vertices: {num_vertices:5d} | " + 
                          f"Rep {rep+1}/{benchmark_params['num_repetitions']} | Time: {elapsed_time:7.2f}s")
                
                # Cleanup - try multiple times as file may be locked
                import time as time_module
                time_module.sleep(0.1)  # Give time for file to be released
                for attempt in range(3):
                    try:
                        os.remove(geojson_path)
                        break
                    except (PermissionError, FileNotFoundError):
                        if attempt < 2:
                            time_module.sleep(0.2)
                        elif os.path.exists(geojson_path):
                            pass  # File may be locked, skip for now

print(f"\n‚úÖ Concurrent benchmarking complete ({test_num} tests with {benchmark_params['num_repetitions']} repetitions)")

In [None]:
# ============================================================================
# BACKUP: Save Concurrent Results
# ============================================================================
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
df_concurrent_backup = pd.DataFrame(benchmark_results_concurrent)
concurrent_backup_file = downloads_path / f"benchmark_concurrent_{timestamp}.csv"
df_concurrent_backup.to_csv(concurrent_backup_file, index=False)
print(f"\nüíæ Concurrent results backed up: {concurrent_backup_file}")
print(f"   Rows: {len(df_concurrent_backup)}")


In [None]:
# ============================================================================
# SWITCH TO STANDARD ENDPOINT for non-concurrent and legacy tests
# ============================================================================
print("\n" + "="*80)
print("SWITCHING ENDPOINTS: Resetting to Standard for non-concurrent tests")
print("="*80)

ee.Reset()
print("‚úÖ Earth Engine reset")

try:
    ee.Initialize(opt_url='https://earthengine.googleapis.com')
    print("‚úÖ Initialized with standard endpoint")
except Exception:
    ee.Authenticate()
    ee.Initialize(opt_url='https://earthengine.googleapis.com')
    print("‚úÖ Authenticated and initialized with standard endpoint")

# Verify endpoint
api_url = str(ee.data._cloud_api_base_url)
print(f"üìç Current endpoint: {'HIGH-VOLUME' if 'highvolume' in api_url else 'STANDARD'}")


In [None]:
# ============================================================================
# BENCHMARK 2: SEQUENTIAL METHOD (Standard Endpoint)
# ============================================================================
print("\n" + "="*80)
print("BENCHMARK 2: SEQUENTIAL METHOD (Standard Endpoint)")
print(f"Running {benchmark_params['num_repetitions']} repetitions per test")
print("="*80)

benchmark_results_sequential = []

# Iterate through all parameter combinations with repetitions
total_combinations = (len(benchmark_params['areas_ha']) * 
                      len(benchmark_params['polygon_counts']) * 
                      len(benchmark_params['vertex_complexity']))
total_tests = total_combinations * benchmark_params['num_repetitions']
test_num = 0

for area_ha in benchmark_params['areas_ha']:
    for num_polygons in benchmark_params['polygon_counts']:
        for num_vertices in benchmark_params['vertex_complexity']:
            # Run each configuration multiple times
            for rep in range(benchmark_params['num_repetitions']):
                test_num += 1
                
                # Generate test data
                geojson_path, geojson_data = generate_test_geojson(
                    num_polygons=num_polygons,
                    area_ha=area_ha,
                    num_vertices=num_vertices,
                    bounds=geom
                )
                
                # Run sequential processing with timing
                start_time = time.time()
                try:
                    df_result = whisp.whisp_formatted_stats_geojson_to_df_sequential(
                        input_geojson_filepath=geojson_path,
                        national_codes=iso2_codes,
                        add_metadata_client_side=True,
                        logger=logger,
                    )
                    elapsed_time = time.time() - start_time
                    status = "‚úÖ"
                    error_msg = None
                    rows_processed = df_result.shape[0]
                    
                except Exception as e:
                    elapsed_time = time.time() - start_time
                    status = "‚ùå"
                    error_msg = str(e)
                    rows_processed = 0
                
                # Store results
                benchmark_results_sequential.append({
                    'method': 'Sequential',
                    'area_ha': area_ha,
                    'num_polygons': num_polygons,
                    'num_vertices': num_vertices,
                    'repetition': rep + 1,
                    'total_features': num_polygons,
                    'rows_processed': rows_processed,
                    'time_seconds': elapsed_time,
                    'time_per_polygon': elapsed_time / num_polygons if num_polygons > 0 else 0,
                    'status': status,
                    'error': error_msg
                })
                
                # Show progress (only show every 3rd rep to reduce clutter)
                if rep == benchmark_params['num_repetitions'] - 1:
                    print(f"[{test_num:3d}/{total_tests}] {status} Area: {area_ha:3d}ha | " + 
                          f"Polygons: {num_polygons:3d} | Vertices: {num_vertices:5d} | " + 
                          f"Rep {rep+1}/{benchmark_params['num_repetitions']} | Time: {elapsed_time:7.2f}s")
                
                # Cleanup - try multiple times as file may be locked
                import time as time_module
                time_module.sleep(0.1)  # Give time for file to be released
                for attempt in range(3):
                    try:
                        os.remove(geojson_path)
                        break
                    except (PermissionError, FileNotFoundError):
                        if attempt < 2:
                            time_module.sleep(0.2)
                        elif os.path.exists(geojson_path):
                            pass  # File may be locked, skip for now

print(f"\n‚úÖ Sequential benchmarking complete ({test_num} tests with {benchmark_params['num_repetitions']} repetitions)")

In [None]:
# ============================================================================
# BACKUP: Save Sequential Results
# ============================================================================
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
df_sequential_backup = pd.DataFrame(benchmark_results_sequential)
sequential_backup_file = downloads_path / f"benchmark_sequential_{timestamp}.csv"
df_sequential_backup.to_csv(sequential_backup_file, index=False)
print(f"\nüíæ Sequential results backed up: {sequential_backup_file}")
print(f"   Rows: {len(df_sequential_backup)}")


In [None]:
# ============================================================================
# BENCHMARK 3: LEGACY METHOD (Standard Endpoint)
# ============================================================================
print("\n" + "="*80)
print("BENCHMARK 3: LEGACY METHOD - whisp_formatted_stats_geojson_to_df (Standard Endpoint)")
print(f"Running {benchmark_params['num_repetitions']} repetitions per test")
print("="*80)

benchmark_results_legacy = []

# Iterate through all parameter combinations with repetitions
total_combinations = (len(benchmark_params['areas_ha']) * 
                      len(benchmark_params['polygon_counts']) * 
                      len(benchmark_params['vertex_complexity']))
total_tests = total_combinations * benchmark_params['num_repetitions']
test_num = 0

for area_ha in benchmark_params['areas_ha']:
    for num_polygons in benchmark_params['polygon_counts']:
        for num_vertices in benchmark_params['vertex_complexity']:
            # Run each configuration multiple times
            for rep in range(benchmark_params['num_repetitions']):
                test_num += 1
                
                # Generate test data
                geojson_path, geojson_data = generate_test_geojson(
                    num_polygons=num_polygons,
                    area_ha=area_ha,
                    num_vertices=num_vertices,
                    bounds=geom
                )
                
                # Run legacy processing with timing
                start_time = time.time()
                try:
                    df_result = whisp.whisp_formatted_stats_geojson_to_df(
                        input_geojson_filepath=geojson_path,
                        national_codes=iso2_codes,
                        whisp_image=whisp_image_bench if precompiled_image == True else None,
                    )
                    elapsed_time = time.time() - start_time
                    status = "‚úÖ"
                    error_msg = None
                    rows_processed = df_result.shape[0]
                    
                except Exception as e:
                    elapsed_time = time.time() - start_time
                    status = "‚ùå"
                    error_msg = str(e)
                    rows_processed = 0
                
                # Store results
                benchmark_results_legacy.append({
                    'method': 'Legacy',
                    'area_ha': area_ha,
                    'num_polygons': num_polygons,
                    'num_vertices': num_vertices,
                    'repetition': rep + 1,
                    'total_features': num_polygons,
                    'rows_processed': rows_processed,
                    'time_seconds': elapsed_time,
                    'time_per_polygon': elapsed_time / num_polygons if num_polygons > 0 else 0,
                    'status': status,
                    'error': error_msg
                })
                
                # Show progress (only show every 3rd rep to reduce clutter)
                if rep == benchmark_params['num_repetitions'] - 1:
                    print(f"[{test_num:3d}/{total_tests}] {status} Area: {area_ha:3d}ha | " + 
                          f"Polygons: {num_polygons:3d} | Vertices: {num_vertices:5d} | " + 
                          f"Rep {rep+1}/{benchmark_params['num_repetitions']} | Time: {elapsed_time:7.2f}s")
                
                # Cleanup - try multiple times as file may be locked
                import time as time_module
                time_module.sleep(0.1)  # Give time for file to be released
                for attempt in range(3):
                    try:
                        os.remove(geojson_path)
                        break
                    except (PermissionError, FileNotFoundError):
                        if attempt < 2:
                            time_module.sleep(0.2)
                        elif os.path.exists(geojson_path):
                            pass  # File may be locked, skip for now

print(f"\n‚úÖ Legacy benchmarking complete ({test_num} tests with {benchmark_params['num_repetitions']} repetitions)")

In [None]:
# ============================================================================
# BACKUP: Save Legacy Results
# ============================================================================
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
df_legacy_backup = pd.DataFrame(benchmark_results_legacy)
legacy_backup_file = downloads_path / f"benchmark_legacy_{timestamp}.csv"
df_legacy_backup.to_csv(legacy_backup_file, index=False)
print(f"\nüíæ Legacy results backed up: {legacy_backup_file}")
print(f"   Rows: {len(df_legacy_backup)}")


In [30]:
# ============================================================================
# ANALYSIS 1: Combined Results Summary (with Statistics)
# ============================================================================
print("\n" + "="*80)
print("ANALYSIS 1: COMBINED RESULTS SUMMARY (with Statistics)")
print("="*80)
# Combine all results
all_results = (benchmark_results_concurrent + 
               benchmark_results_sequential 
               + benchmark_results_legacy
            )

df_all_results = pd.DataFrame(all_results)
# df_all_results.to_csv("raw_")
print("\nüìä Detailed Statistics by Method:")
print("(Mean ¬± Std Dev, with min/max and count)\n")

stats_by_method = df_all_results.groupby('method').agg({
    'time_seconds': ['count', 'mean', 'std', 'min', 'max'],
    'time_per_polygon': ['mean', 'std'],
    'rows_processed': 'sum'
}).round(3)
print(stats_by_method)

# Calculate 95% confidence intervals
print("\n\nüìä 95% Confidence Intervals by Method:")
print("(Mean ¬± CI)\n")

from scipy import stats as scipy_stats

for method in df_all_results['method'].unique():
    method_data = df_all_results[df_all_results['method'] == method]['time_seconds']
    mean = method_data.mean()
    sem = scipy_stats.sem(method_data)  # Standard error of mean
    ci = sem * scipy_stats.t.ppf((1 + 0.95) / 2, len(method_data) - 1)
    n = len(method_data)
    
    print(f"{method:18s}: {mean:7.2f}s ¬± {ci:6.2f}s (n={n}, 95% CI)")

print("\nüìä Success Rate by Method:")
success_rates = df_all_results.groupby('method')['status'].value_counts().unstack(fill_value=0)
print(success_rates)
print("\nSuccess percentage:")
for method in df_all_results['method'].unique():
    method_df = df_all_results[df_all_results['method'] == method]
    success_pct = (method_df['status'] == '‚úÖ').sum() / len(method_df) * 100
    print(f"  {method:18s}: {success_pct:6.1f}%")


ANALYSIS 1: COMBINED RESULTS SUMMARY (with Statistics)

üìä Detailed Statistics by Method:
(Mean ¬± Std Dev, with min/max and count)

           time_seconds                                time_per_polygon  \
                  count    mean    std     min     max             mean   
method                                                                    
Concurrent           20  18.664  3.436  14.935  32.116            0.136   
Legacy               20  19.830  4.951  11.164  30.128            0.139   
Sequential           20  10.530  3.534   6.600  18.483            0.073   

                  rows_processed  
              std            sum  
method                            
Concurrent  0.031           2875  
Legacy      0.019           2875  
Sequential  0.013           2875  


üìä 95% Confidence Intervals by Method:
(Mean ¬± CI)

Concurrent        :   18.66s ¬±   1.61s (n=20, 95% CI)
Sequential        :   10.53s ¬±   1.65s (n=20, 95% CI)
Legacy            :   19.83s ¬±   2.

In [31]:
# # ============================================================================
# # DEBUG: Print Concurrent Error Details & Endpoint Status
# # ============================================================================
# print("\n" + "="*80)
# print("CONCURRENT BENCHMARK - DIAGNOSTICS")
# print("="*80)

# # Check endpoint
# api_url = str(ee.data._cloud_api_base_url)
# print(f"\nüìç Current Endpoint: {api_url}")
# print(f"   Is HIGH-VOLUME: {'YES' if 'highvolume' in api_url.lower() else 'NO'}")

# # Print all errors
# errors_found = 0
# for i, result in enumerate(benchmark_results_concurrent):
#     if result['status'] == '‚ùå':
#         errors_found += 1
#         print(f"\n‚ùå Run {i+1}:")
#         print(f"   Error: {result['error']}")
#         print(f"   Config: {result['num_polygons']} polygons, {result['num_vertices']} vertices, {result['area_ha']}ha")

# if errors_found == 0:
#     print(f"\n‚úÖ No errors found in concurrent benchmark!")
# else:
#     print(f"\n‚ö†Ô∏è  Total errors: {errors_found}/{len(benchmark_results_concurrent)}")
#     print(f"\nüîç Root Cause Analysis:")
#     if not check_ee_endpoint("high-volume"):
#         print(f"   ‚Üí Using WRONG endpoint (STANDARD instead of HIGH-VOLUME)")
#         print(f"   ‚Üí This is likely causing all concurrent tests to fail")
#     else:
#         print(f"   ‚Üí Endpoint is correct, check function parameters")

In [32]:
# SETUP: Ensure high-volume endpoint is active for concurrent benchmark
ee.Reset()
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
print("‚úÖ High-volume endpoint activated for concurrent benchmark")


‚úÖ High-volume endpoint activated for concurrent benchmark


In [33]:
# ============================================================================
# ANALYSIS 2: Performance by Input Size (Averaging Multiple Runs)
# ============================================================================
print("\n" + "="*80)
print("ANALYSIS 2: PERFORMANCE BY INPUT SIZE")
print("(Averages across {0} repetitions)".format(benchmark_params['num_repetitions']))
print("="*80)

# Analyze by number of polygons (complexity indicator)
print("\nüìà Average Time by Polygon Count (¬± Std Dev):")
polygon_stats = df_all_results.groupby(['method', 'num_polygons'])['time_seconds'].agg(['mean', 'std', 'count'])
for method in df_all_results['method'].unique():
    print(f"\n{method}:")
    method_data = polygon_stats.loc[method]
    for idx in sorted(method_data.index):
        mean, std, count = method_data.loc[idx]
        print(f"  {int(idx):3d} polygons: {mean:7.2f}s ¬± {std:6.2f}s (n={int(count)})")

print("\nüìà Average Time by Area Size (¬± Std Dev):")
area_stats = df_all_results.groupby(['method', 'area_ha'])['time_seconds'].agg(['mean', 'std', 'count'])
for method in df_all_results['method'].unique():
    print(f"\n{method}:")
    method_data = area_stats.loc[method]
    for idx in sorted(method_data.index):
        mean, std, count = method_data.loc[idx]
        print(f"  {int(idx):3d}ha: {mean:7.2f}s ¬± {std:6.2f}s (n={int(count)})")

print("\nüìà Average Time by Vertex Complexity (¬± Std Dev):")
vertex_stats = df_all_results.groupby(['method', 'num_vertices'])['time_seconds'].agg(['mean', 'std', 'count'])
for method in df_all_results['method'].unique():
    print(f"\n{method}:")
    method_data = vertex_stats.loc[method]
    for idx in sorted(method_data.index):
        mean, std, count = method_data.loc[idx]
        print(f"  {int(idx):5d} vertices: {mean:7.2f}s ¬± {std:6.2f}s (n={int(count)})")


ANALYSIS 2: PERFORMANCE BY INPUT SIZE
(Averages across 5 repetitions)

üìà Average Time by Polygon Count (¬± Std Dev):

Concurrent:
  100 polygons:   17.56s ¬±   0.75s (n=5)
  125 polygons:   17.42s ¬±   2.23s (n=5)
  150 polygons:   18.18s ¬±   0.73s (n=5)
  200 polygons:   21.49s ¬±   6.02s (n=5)

Sequential:
  100 polygons:    7.66s ¬±   1.00s (n=5)
  125 polygons:    8.61s ¬±   1.68s (n=5)
  150 polygons:   10.71s ¬±   1.91s (n=5)
  200 polygons:   15.14s ¬±   3.21s (n=5)

Legacy:
  100 polygons:   14.53s ¬±   3.19s (n=5)
  125 polygons:   18.07s ¬±   1.33s (n=5)
  150 polygons:   20.33s ¬±   1.97s (n=5)
  200 polygons:   26.38s ¬±   2.76s (n=5)

üìà Average Time by Area Size (¬± Std Dev):

Concurrent:
   10ha:   18.66s ¬±   3.44s (n=20)

Sequential:
   10ha:   10.53s ¬±   3.53s (n=20)

Legacy:
   10ha:   19.83s ¬±   4.95s (n=20)

üìà Average Time by Vertex Complexity (¬± Std Dev):

Concurrent:
     10 vertices:   18.66s ¬±   3.44s (n=20)

Sequential:
     10 vertices:   10.53s

In [None]:
# ============================================================================
# OUTLIER DETECTION & REMOVAL (EXTREMELY CONSERVATIVE - Only Extreme Outliers)
# ============================================================================
print("\n" + "="*80)
print("OUTLIER DETECTION & REMOVAL (EXTREMELY CONSERVATIVE - Only EXTREME Outliers)")
print("="*80)

# Identify outliers using EXTREMELY conservative method (5x IQR instead of 1.5x or 3x)
# This only catches REALLY EXTREME values - like 5-10x slower than normal
from scipy import stats as scipy_stats

print("\nBefore outlier removal:")
print(f"  Total rows: {len(df_all_results)}")

outliers = []
for method in df_all_results['method'].unique():
    method_data = df_all_results[df_all_results['method'] == method]['time_seconds']
    
    Q1 = method_data.quantile(0.25)
    Q3 = method_data.quantile(0.75)
    IQR = Q3 - Q1
    # Use 5x IQR (extremely conservative) - only catch extreme outliers
    lower_bound = Q1 - 5.0 * IQR
    upper_bound = Q3 + 5.0 * IQR
    
    method_outliers = df_all_results[
        (df_all_results['method'] == method) & 
        ((df_all_results['time_seconds'] < lower_bound) | (df_all_results['time_seconds'] > upper_bound))
    ]
    
    if len(method_outliers) > 0:
        print(f"\n{method}:")
        print(f"  EXTREME bounds (5x IQR): [{lower_bound:.2f}s, {upper_bound:.2f}s]")
        print(f"  EXTREME outliers found: {len(method_outliers)}")
        for idx, row in method_outliers.iterrows():
            deviation = abs(row['time_seconds'] - method_data.mean()) / method_data.std()
            print(f"    - Row {idx}: {row['time_seconds']:.2f}s ({deviation:.1f}œÉ deviation) | " +
                  f"{row['num_polygons']} polygons √ó {row['num_vertices']} vertices | Rep {row['repetition']}")
            outliers.append(idx)
    else:
        print(f"\n{method}: No extreme outliers detected")

if outliers:
    print(f"\n{'='*80}")
    print(f"‚ö†Ô∏è  Removing {len(outliers)} EXTREME outlier rows (>5x IQR)...")
    
    # Remove outliers
    df_all_results_clean = df_all_results.drop(outliers).reset_index(drop=True)
    
    print(f"After removal:")
    print(f"  Total rows: {len(df_all_results_clean)}")
    print(f"  Rows removed: {len(outliers)}")
    
    # Recalculate statistics
    print(f"\n{'='*80}")
    print("RECALCULATED STATISTICS (WITHOUT EXTREME OUTLIERS)")
    print(f"{'='*80}")
    
    stats_by_method = df_all_results_clean.groupby('method').agg({
        'time_seconds': ['count', 'mean', 'std', 'min', 'max'],
        'rows_processed': 'sum'
    }).round(3)
    print(stats_by_method)
    
    print(f"\n95% Confidence Intervals (Recalculated):")
    for method in df_all_results_clean['method'].unique():
        method_data = df_all_results_clean[df_all_results_clean['method'] == method]['time_seconds']
        mean = method_data.mean()
        sem = scipy_stats.sem(method_data)
        ci = sem * scipy_stats.t.ppf((1 + 0.95) / 2, len(method_data) - 1)
        n = len(method_data)
        print(f"  {method:18s}: {mean:7.2f}s ¬± {ci:6.2f}s (n={n}, 95% CI)")
    
    # Update the main dataframe
    df_all_results = df_all_results_clean
    print(f"\n‚úÖ Extreme outliers removed and statistics recalculated")
    print(f"   Use df_all_results for updated analysis")
else:
    print(f"\n‚úÖ No extreme outliers detected - data is clean")


OUTLIER DETECTION & REMOVAL (EXTREMELY CONSERVATIVE - Only CRAZY Outliers)

Before outlier removal:
  Total rows: 60

Concurrent:
  CRAZY bounds (5x IQR): [10.36s, 25.82s]
  CRAZY outliers found: 1
    - Row 17: 32.12s (3.9œÉ deviation) | 200 polygons √ó 10 vertices | Rep 3

Sequential: No crazy outliers detected

Legacy: No crazy outliers detected

‚ö†Ô∏è  Removing 1 CRAZY outlier rows (>5x IQR)...
After removal:
  Total rows: 59
  Rows removed: 1

RECALCULATED STATISTICS (WITHOUT CRAZY OUTLIERS)
           time_seconds                                rows_processed
                  count    mean    std     min     max            sum
method                                                               
Concurrent           19  17.956  1.371  14.935  20.499           2675
Legacy               20  19.830  4.951  11.164  30.128           2875
Sequential           20  10.530  3.534   6.600  18.483           2875

95% Confidence Intervals (Recalculated):
  Concurrent        :   17.96s ¬± 

In [None]:
# ============================================================================
# RECALCULATE ANALYSIS WITH CLEAN DATA (After Outlier Removal)
# ============================================================================
print("\n" + "="*80)
print("RECALCULATING ANALYSIS WITH OUTLIERS REMOVED")
print("="*80)

# Recalculate df_averaged from cleaned df_all_results
df_averaged = df_all_results.groupby(['method', 'area_ha', 'num_polygons', 'num_vertices']).agg({
    'time_seconds': ['mean', 'std', 'count'],
    'time_per_polygon': 'mean'
}).round(3)

# Flatten column names
df_averaged.columns = ['_'.join(col).strip() for col in df_averaged.columns.values]
df_averaged = df_averaged.reset_index()

print(f"\n‚úÖ Recalculated df_averaged with {len(df_averaged)} configurations (cleaned data)")

# Recalculate speedup analysis with cleaned data
df_concurrent_avg = df_averaged[df_averaged['method'] == 'Concurrent'].copy()
df_sequential_avg = df_averaged[df_averaged['method'] == 'Sequential'].copy()
df_legacy_avg = df_averaged[df_averaged['method'] == 'Legacy'].copy()

# Set indices for merging
df_concurrent_avg = df_concurrent_avg.set_index(['area_ha', 'num_polygons', 'num_vertices'])
df_sequential_avg = df_sequential_avg.set_index(['area_ha', 'num_polygons', 'num_vertices'])
df_legacy_avg = df_legacy_avg.set_index(['area_ha', 'num_polygons', 'num_vertices'])

# Calculate speedup from averaged times (CLEANED)
speedup_data = []
for idx in df_concurrent_avg.index:
    if idx in df_sequential_avg.index and idx in df_legacy_avg.index:
        concurrent_time = df_concurrent_avg.loc[idx, 'time_seconds_mean']
        sequential_time = df_sequential_avg.loc[idx, 'time_seconds_mean']
        legacy_time = df_legacy_avg.loc[idx, 'time_seconds_mean']
        
        speedup_data.append({
            'area_ha': idx[0],
            'num_polygons': idx[1],
            'num_vertices': idx[2],
            'concurrent_mean': concurrent_time,
            'sequential_mean': sequential_time,
            'legacy_mean': legacy_time,
            'speedup_vs_sequential': sequential_time / concurrent_time if concurrent_time > 0 else 0,
            'speedup_vs_legacy': legacy_time / concurrent_time if concurrent_time > 0 else 0,
        })

df_speedup = pd.DataFrame(speedup_data)
print(f"‚úÖ Recalculated df_speedup with {len(df_speedup)} speedup comparisons (cleaned data)")

print(f"\nüìä Speedup Summary (Cleaned Data):")
print(f"  vs Sequential: {df_speedup['speedup_vs_sequential'].mean():.2f}x average")
print(f"  vs Legacy:     {df_speedup['speedup_vs_legacy'].mean():.2f}x average")

In [None]:
# ============================================================================
# ANALYSIS 5: Visualizations (Including Error Bars from Repetitions)
# ============================================================================
print("\n" + "="*80)
print("ANALYSIS 5: CREATING VISUALIZATIONS (with Error Bars)")
print("="*80)

import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 12)

# Create figure with subplots
fig = plt.figure(figsize=(20, 16))

# 1. Time vs Polygon Count (with error bars)
ax1 = plt.subplot(3, 3, 1)
for method in sorted(df_averaged['method'].unique()):
    data = df_averaged[df_averaged['method'] == method].groupby('num_polygons').agg({
        'time_seconds_mean': 'mean',
        'time_seconds_std': 'mean'
    }).reset_index()
    ax1.errorbar(data['num_polygons'], data['time_seconds_mean'], 
                yerr=data['time_seconds_std'], marker='o', label=method, linewidth=2, capsize=5)
ax1.set_xlabel('Number of Polygons')
ax1.set_ylabel('Time (seconds)')
ax1.set_title('Processing Time vs Polygon Count (with Std Dev)')
ax1.set_xscale('log')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. Time vs Vertex Complexity (with error bars)
ax2 = plt.subplot(3, 3, 2)
for method in sorted(df_averaged['method'].unique()):
    data = df_averaged[df_averaged['method'] == method].groupby('num_vertices').agg({
        'time_seconds_mean': 'mean',
        'time_seconds_std': 'mean'
    }).reset_index()
    ax2.errorbar(data['num_vertices'], data['time_seconds_mean'], 
                yerr=data['time_seconds_std'], marker='s', label=method, linewidth=2, capsize=5)
ax2.set_xlabel('Number of Vertices per Polygon')
ax2.set_ylabel('Time (seconds)')
ax2.set_title('Processing Time vs Vertex Complexity (with Std Dev)')
ax2.set_xscale('log')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. Time vs Area Size (with error bars)
ax3 = plt.subplot(3, 3, 3)
for method in sorted(df_averaged['method'].unique()):
    data = df_averaged[df_averaged['method'] == method].groupby('area_ha').agg({
        'time_seconds_mean': 'mean',
        'time_seconds_std': 'mean'
    }).reset_index()
    ax3.errorbar(data['area_ha'], data['time_seconds_mean'], 
                yerr=data['time_seconds_std'], marker='^', label=method, linewidth=2, capsize=5)
ax3.set_xlabel('Area (hectares)')
ax3.set_ylabel('Time (seconds)')
ax3.set_title('Processing Time vs Area Size (with Std Dev)')
ax3.legend()
ax3.grid(True, alpha=0.3)

# 4. Variability comparison (coefficient of variation)
ax4 = plt.subplot(3, 3, 4)
variability_data = []
for method in sorted(df_averaged['method'].unique()):
    method_times = df_averaged[df_averaged['method'] == method]['time_seconds_mean']
    cv = (method_times.std() / method_times.mean()) * 100
    variability_data.append({'Method': method, 'CV (%)': cv})
variability_df = pd.DataFrame(variability_data)
bars = ax4.bar(variability_df['Method'], variability_df['CV (%)'], color=['steelblue', 'coral', 'green'])
ax4.set_ylabel('Coefficient of Variation (%)')
ax4.set_title('Consistency: Lower CV = More Predictable')
ax4.grid(True, alpha=0.3, axis='y')
for bar in bars:
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.1f}%', ha='center', va='bottom')

# 5. Speedup vs Sequential
ax5 = plt.subplot(3, 3, 5)
speedup_by_polygons = df_speedup.groupby('num_polygons')['speedup_vs_sequential'].mean()
bars = ax5.bar(range(len(speedup_by_polygons)), speedup_by_polygons.values, color='steelblue')
ax5.set_xticks(range(len(speedup_by_polygons)))
ax5.set_xticklabels(speedup_by_polygons.index)
ax5.set_xlabel('Number of Polygons')
ax5.set_ylabel('Speedup (x)')
ax5.set_title('Concurrent Speedup vs Sequential')
ax5.axhline(y=1, color='r', linestyle='--', alpha=0.5, label='No speedup')
ax5.legend()
ax5.grid(True, alpha=0.3, axis='y')
for bar in bars:
    height = bar.get_height()
    ax5.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.2f}x', ha='center', va='bottom')

# 6. Speedup vs Legacy
ax6 = plt.subplot(3, 3, 6)
speedup_by_polygons_legacy = df_speedup.groupby('num_polygons')['speedup_vs_legacy'].mean()
bars = ax6.bar(range(len(speedup_by_polygons_legacy)), speedup_by_polygons_legacy.values, color='coral')
ax6.set_xticks(range(len(speedup_by_polygons_legacy)))
ax6.set_xticklabels(speedup_by_polygons_legacy.index)
ax6.set_xlabel('Number of Polygons')
ax6.set_ylabel('Speedup (x)')
ax6.set_title('Concurrent Speedup vs Legacy')
ax6.axhline(y=1, color='r', linestyle='--', alpha=0.5, label='No speedup')
ax6.legend()
ax6.grid(True, alpha=0.3, axis='y')
for bar in bars:
    height = bar.get_height()
    ax6.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.2f}x', ha='center', va='bottom')

# 7. Distribution of all runs (violin plot)
ax7 = plt.subplot(3, 3, 7)
parts = ax7.violinplot([df_all_results[df_all_results['method'] == m]['time_seconds'].values 
                        for m in sorted(df_all_results['method'].unique())],
                       positions=range(len(df_all_results['method'].unique())),
                       showmeans=True, showmedians=True)
ax7.set_xticks(range(len(df_all_results['method'].unique())))
ax7.set_xticklabels(sorted(df_all_results['method'].unique()))
ax7.set_ylabel('Time (seconds)')
ax7.set_title('Distribution of All Runs (Violin Plot)')
ax7.grid(True, alpha=0.3, axis='y')

# 8. Box plot by method
ax8 = plt.subplot(3, 3, 8)
box_data = [df_all_results[df_all_results['method'] == m]['time_seconds'].values 
            for m in sorted(df_all_results['method'].unique())]
bp = ax8.boxplot(box_data, labels=sorted(df_all_results['method'].unique()), patch_artist=True)
for patch, color in zip(bp['boxes'], ['steelblue', 'coral', 'green']):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
ax8.set_ylabel('Time (seconds)')
ax8.set_title('Time Distribution by Method (Box Plot)')
ax8.grid(True, alpha=0.3, axis='y')

# 9. Heatmap: Mean time by Polygons and Method
ax9 = plt.subplot(3, 3, 9)
pivot_by_method = df_averaged.pivot_table(
    values='time_seconds_mean',
    index='num_polygons',
    columns='method',
    aggfunc='mean'
)
sns.heatmap(pivot_by_method, annot=True, fmt='.1f', cmap='RdYlGn_r', ax=ax9, cbar_kws={'label': 'Time (s)'})
ax9.set_title('Mean Time by Method and Polygon Count')

plt.suptitle('WHISP Stats Processing Benchmark - Statistical Analysis ({} repetitions each)'.format(benchmark_params['num_repetitions']), 
             fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout(rect=[0, 0.03, 1, 0.99])
plt.show()

print("‚úÖ Visualizations created")

In [None]:
# ============================================================================
# EXPORT RESULTS (Raw Data + Summary Statistics)
# ============================================================================
print("\n" + "="*80)
print("EXPORTING RESULTS TO DOWNLOADS")
print("="*80)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# 1. Export raw results (all individual runs)
raw_df = df_all_results.copy()
raw_filename = downloads_path / f"benchmark_results_raw_{timestamp}.csv"
raw_df.to_csv(raw_filename, index=False)
print(f"\nüíæ Raw results (all {len(raw_df)} runs): {raw_filename}")

# 2. Export averaged results (per configuration)
avg_export_df = df_averaged.copy()
avg_export_df = avg_export_df.sort_values(['method', 'num_polygons', 'num_vertices', 'area_ha'])
avg_filename = downloads_path / f"benchmark_results_averaged_{timestamp}.csv"
avg_export_df.to_csv(avg_filename, index=False)
print(f"üíæ Averaged results ({len(avg_export_df)} configurations): {avg_filename}")

# 3. Export speedup analysis
speedup_filename = downloads_path / f"benchmark_speedup_analysis_{timestamp}.csv"
df_speedup.to_csv(speedup_filename, index=False)
print(f"üíæ Speedup analysis: {speedup_filename}")

# Display summary statistics
print(f"\nüìä Results Summary:")
print(f"  Total raw test runs: {len(raw_df)}")
print(f"    - Concurrent:   {len(raw_df[raw_df['method'] == 'Concurrent'])}")
print(f"    - Sequential:   {len(raw_df[raw_df['method'] == 'Sequential'])}")
print(f"    - Legacy:       {len(raw_df[raw_df['method'] == 'Legacy'])}")
print(f"  Total configurations: {len(avg_export_df)}")
print(f"  Repetitions per config: {benchmark_params['num_repetitions']}")

print(f"\nüìä Output Directory:")
print(f"  üìÅ {downloads_path}")

print(f"\nüìä Raw Data Preview (first 10 runs):")
print(raw_df[['method', 'area_ha', 'num_polygons', 'num_vertices', 'repetition', 'time_seconds', 'status']].head(10).to_string(index=False))

print(f"\nüìä Averaged Data Preview (first 10 configs):")
print(avg_export_df[['method', 'area_ha', 'num_polygons', 'num_vertices', 'time_seconds_mean', 'time_seconds_std', 'time_seconds_count']].head(10).to_string(index=False))