# Test: WHISP Concurrent & Sequential Processing

Testing new concurrent and sequential stats processing functions with proper logging, progress tracking, and endpoint validation.

## Test Structure

- Concurrent processing (high-volume endpoint)
- Sequential processing (standard endpoint)
- Results comparison and validation

## Part 1: Setup

Initialize Earth Engine and configure logging

In [64]:
import ee

# Reset Earth Engine completely
ee.Reset()
print("✅ Earth Engine reset")

✅ Earth Engine reset


## Part 2: CONCURRENT PROCESSING (High-Volume Endpoint)

Test concurrent processing with the high-volume endpoint

In [None]:
import ee

# Initialize and set high-volume endpoint
try:
    ee.Initialize()
    print("Earth Engine reset")
    ee.data.setDebuggingEnabled(False)
    print("Initialized with high-volume endpoint")
except Exception as e:
    try:
        ee.Authenticate()
        
        ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
        print("Earth Engine reset and initialized with HIGH-VOLUME endpoint for benchmarking")
        print("Authenticated and initialized with high-volume endpoint")
    except:
        print("Using HIGH-VOLUME endpoint")
        if "high-volume" not in str(e).lower():
            print("WARNING: Not using high-volume endpoint!")

Earth Engine reset
Earth Engine reset and initialized with HIGH-VOLUME endpoint for benchmarking
Authenticated and initialized with high-volume endpoint
Earth Engine reset and initialized with HIGH-VOLUME endpoint for benchmarking
Authenticated and initialized with high-volume endpoint


In [66]:
# Verify endpoint is high-volume
api_url = str(ee.data._cloud_api_base_url)
if 'highvolume' in api_url:
    print("✅ Using HIGH-VOLUME endpoint")
else:
    print("❌ WARNING: Not using high-volume endpoint!")

✅ Using HIGH-VOLUME endpoint


In [67]:
import openforis_whisp as whisp
import logging
from openforis_whisp.advanced_stats import (
    setup_concurrent_logger,
    validate_ee_endpoint,
    whisp_stats_geojson_to_df_concurrent,
    check_ee_endpoint,
)

print("✅ Imported advanced stats module")

✅ Imported advanced stats module


In [54]:
# Setup logging for concurrent processing
logger = setup_concurrent_logger(level=logging.INFO)
logger.info("Logging configured")

INFO: Logging configured


In [55]:
# Choose if want to include additional custom layers
USE_CUSTOM_BANDS = True # set to True if want to add extra ee data to whisp

In [56]:
# Choose if want to include additional custom layers
USE_CUSTOM_BANDS = True # set to True if want to add extra ee data to whisp
# =============================================================================
# CUSTOM BANDS SETUP (OPTIONAL) - runs only if USE_CUSTOM_BANDS = True above
# =============================================================================
if USE_CUSTOM_BANDS:

    # Step 1: Define custom Earth Engine images (binary values 0 or 1)
    custom_images = {
        'example_treecover': ee.Image(1),  # ee.Image("UMD/hansen/global_forest_change_2024_v1_12").select("treecover2000").gt(10).selfMask()
        'nXX_example_commodity': ee.Image.random(seed=1).gte(.5).reproject(crs='EPSG:4326', scale=10) # ee.ImageCollection("projects/forestdatapartnership/assets/cocoa/model_2025a").filter(ee.Filter.date('2020-01-01', '2021-01-01')).mosaic().gt(.8).selfMask()
        # add more images as needed (prefix 'nXX_' = iso2 code for national dataset)
    }

    # Step 2: Define metadata for each custom band (keys must match above)
    # Themes: 'treecover', 'commodities', 'disturbance_before', 'disturbance_after'
    # Timber themes: 'primary', 'naturally_reg_2020', 'planted_plantation_2020', etc.
    custom_bands_info = {
        'example_treecover': {
            'ISO2_code': "",          # Country code (empty = all countries)
            'theme': 'treecover',     # Risk theme
            'theme_timber': "",       # Timber theme (if applicable)
            'use_for_risk': 1,        # Include in risk calculations (1=yes, 0=no)
            'use_for_risk_timber': 0  # Include in timber risk (1=yes, 0=no)
        },
        'nXX_example_commodity': {
            'ISO2_code': "XX", 
            'theme': 'commodities', 
            'theme_timber': "",
            'use_for_risk': 1, 
            'use_for_risk_timber': 0
        }
        # add more band metadata as needed
    }

    # Step 3: Combine custom bands and extract names
    custom_ee_image = whisp.combine_custom_bands(custom_images, custom_bands_info)

    custom_bands = list(custom_bands_info.keys())


In [57]:
# Choose additional national datasets to include (currently three countries: 'co', 'ci', 'br').
base_iso2_codes = ['co', 'ci', 'br']

# automatically add any custom ISO2 codes from custom_bands_info if USE_CUSTOM_BANDS is True
iso2_codes_list = base_iso2_codes.copy()
if USE_CUSTOM_BANDS:
    iso2_codes_list += [code.lower() for code in {v.get('ISO2_code') for v in custom_bands_info.values()} if code and code.lower() not in iso2_codes_list]

In [58]:
import openforis_whisp as whisp

print("Imported concurrent stats module")

Imported concurrent stats module


In [59]:
num_polygons=1000  # Smaller dataset for testing
min_area_ha=10 
max_area_ha=10 
min_number_vert=10     
max_number_vert=10   

In [60]:
# Generate test data (or use your own GeoJSON)
import geopandas as gpd
import json
import tempfile
import os
import io
from contextlib import redirect_stdout

state_geom = (ee.FeatureCollection("projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1")
    .filter(ee.Filter.inList('gaul1_name', ['Amazonas', 'Mato Grosso', 'Rondônia', 'Pará'])))
bounds = state_geom.geometry().bounds()

# Suppress GeoJSON generation messages
with redirect_stdout(io.StringIO()):
    random_geojson = whisp.generate_test_polygons(
        bounds=bounds, 
        num_polygons=num_polygons,
        min_area_ha=min_area_ha, 
        max_area_ha=max_area_ha, 
        min_number_vert=min_number_vert,     
        max_number_vert=max_number_vert     
    )

# Save to temporary file
temp_fd, concurrent_geojson_path = tempfile.mkstemp(suffix='.geojson', text=True)
os.close(temp_fd)
with open(concurrent_geojson_path, 'w') as f:
    json.dump(random_geojson, f)

print(f"Generated test GeoJSON with {len(random_geojson['features'])} features")
print(f"   Saved to: {concurrent_geojson_path}")

[utils.py | generate_test_polygons() | l.378] INFO: Extracting bounds from Earth Engine Geometry...
[utils.py | generate_test_polygons() | l.391] INFO: Bounds: [-78.71, -18.04, -46.06, 6.20]
[utils.py | generate_test_polygons() | l.419] INFO: Generating 1000 test polygons with 10-10 vertices...
[utils.py | generate_test_polygons() | l.391] INFO: Bounds: [-78.71, -18.04, -46.06, 6.20]
[utils.py | generate_test_polygons() | l.419] INFO: Generating 1000 test polygons with 10-10 vertices...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 250/1000 polygons (25%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 250/1000 polygons (25%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 500/1000 polygons (50%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 500/1000 polygons (50%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 750/1000 polygons (75%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 7

In [61]:
# Helper function to validate GeoJSON file size
def validate_geojson_size(geojson_path, max_size_mb=10):
    """
    Check if GeoJSON file size is within acceptable limits.
    
    Args:
        geojson_path: Path to the GeoJSON file
        max_size_mb: Maximum allowed size in MB (default: 10)
    
    Returns:
        tuple: (is_valid, size_mb, message)
    """
    import os
    
    file_size_bytes = os.path.getsize(geojson_path)
    file_size_mb = file_size_bytes / (1024 * 1024)
    
    is_valid = file_size_mb <= max_size_mb
    
    if is_valid:
        message = f"GeoJSON size OK: {file_size_mb:.2f} MB (limit: {max_size_mb} MB)"
    else:
        message = f"GeoJSON TOO LARGE: {file_size_mb:.2f} MB (limit: {max_size_mb} MB)"
    
    return is_valid, file_size_mb, message

# Test the validation function
test_path = concurrent_geojson_path
msg = validate_geojson_size(test_path, max_size_mb=10)
print(msg[2])

GeoJSON size OK: 0.64 MB (limit: 10 MB)


In [62]:
# Create Whisp image with national codes
iso2_codes = ['br', 'co', 'ci']

# whisp_image = whisp.combine_datasets(national_codes=iso2_codes)
# band_names = whisp_image.bandNames().getInfo()
# print(f"Created Whisp image with {len(band_names)} bands")

In [63]:
# Test concurrent: GeoJSON → DataFrame with automatic formatting
print("\n" + "="*70)
print("TEST 1: Concurrent GeoJSON → DataFrame (Formatted)")
print("="*70 + "\n")

try:
    df_concurrent = whisp.whisp_formatted_stats_geojson_to_df_concurrent(
        input_geojson_filepath=concurrent_geojson_path,
        # whisp_image=whisp_image,
        # custom_bands=custom_bands if USE_CUSTOM_BANDS else None,
        national_codes=iso2_codes,
        batch_size=10,
        max_concurrent=20,
        validate_geometries=False,
        add_metadata_server=False,
        logger=logger,
        
    )
    
    print(f"\n✅ SUCCESS: Concurrent processing complete!")
    print(f"   Processed: {df_concurrent.shape[0]} features")
    print(f"   Output columns: {df_concurrent.shape[1]}")
    print(f"\n   First row sample:")
    print(df_concurrent.iloc[0, :8])
    
except Exception as e:
    print(f"❌ ERROR: {str(e)}")
    import traceback
    traceback.print_exc()


TEST 1: Concurrent GeoJSON → DataFrame (Formatted)

INFO: Loading GeoJSON: C:\Users\Arnell\AppData\Local\Temp\tmpylvfdwa1.geojson
INFO: Loaded 1,000 features
INFO: Loaded 1,000 features
INFO: Processing 1,000 features in 100 batches
INFO: Processing 1,000 features in 100 batches


2025-11-04 14:41:53,025 - INFO - Created 10 records
2025-11-04 14:41:53,048 - INFO - Created 10 records
2025-11-04 14:41:53,048 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp9g8tx7fv.geojson


2025-11-04 14:41:53,112 - INFO - Created 10 records
2025-11-04 14:41:53,159 - INFO - Created 10 records
2025-11-04 14:41:53,159 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpq89pn8ok.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpmdjjol34.geojson


2025-11-04 14:41:53,214 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmphioufh5d.geojson


2025-11-04 14:41:53,214 - INFO - Created 10 records
2025-11-04 14:41:53,233 - INFO - Created 10 records
2025-11-04 14:41:53,233 - INFO - Created 10 records
2025-11-04 14:41:53,267 - INFO - Created 10 records
2025-11-04 14:41:53,267 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpuvs1gdbn.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmphae70zj7.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmprt4vmgj0.geojson


2025-11-04 14:41:53,288 - INFO - Created 10 records
2025-11-04 14:41:53,333 - INFO - Created 10 records
2025-11-04 14:41:53,333 - INFO - Created 10 records
2025-11-04 14:41:53,379 - INFO - Created 10 records
2025-11-04 14:41:53,379 - INFO - Created 10 records
2025-11-04 14:41:53,432 - INFO - Created 10 records
2025-11-04 14:41:53,432 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpsziifqmu.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp8nmylt1u.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpcsl9cdsg.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpc8iav8qk.geojson


2025-11-04 14:41:53,482 - INFO - Created 10 records
2025-11-04 14:41:53,485 - INFO - Created 10 records
2025-11-04 14:41:53,485 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmph6q6uzp8.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpqjpq2nah.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp57g79hja.geojson


2025-11-04 14:41:53,526 - INFO - Created 10 records
2025-11-04 14:41:53,557 - INFO - Created 10 records
2025-11-04 14:41:53,557 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp2jfnyif0.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpeoop3a2x.geojson


2025-11-04 14:41:53,824 - INFO - Created 10 records
2025-11-04 14:41:53,855 - INFO - Created 10 records
2025-11-04 14:41:53,855 - INFO - Created 10 records
2025-11-04 14:41:54,021 - INFO - Created 10 records
2025-11-04 14:41:54,021 - INFO - Created 10 records
2025-11-04 14:41:54,112 - INFO - Created 10 records
2025-11-04 14:41:54,112 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpajcg23zx.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpr7b34w_t.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp_rkfxric.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp44cqfa84.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp44cqfa84.geojson


2025-11-04 14:42:00,234 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp35zppvfb.geojson


2025-11-04 14:42:00,843 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp2i1wprn8.geojson


2025-11-04 14:42:02,341 - INFO - Created 10 records
2025-11-04 14:42:02,395 - INFO - Created 10 records
2025-11-04 14:42:02,395 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpiv0a5r93.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmppue9zdm7.geojson


2025-11-04 14:42:02,826 - INFO - Created 10 records
2025-11-04 14:42:02,877 - INFO - Created 10 records
2025-11-04 14:42:02,877 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpomkidc1w.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp1gpxy9v2.geojson


2025-11-04 14:42:03,468 - INFO - Created 10 records
2025-11-04 14:42:03,502 - INFO - Created 10 records
2025-11-04 14:42:03,502 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpneak2jsz.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmps8xvfkoe.geojson


2025-11-04 14:42:03,923 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp52fmilvp.geojson


2025-11-04 14:42:04,181 - INFO - Created 10 records
2025-11-04 14:42:04,189 - INFO - Created 10 records
2025-11-04 14:42:04,189 - INFO - Created 10 records
2025-11-04 14:42:04,299 - INFO - Created 10 records
2025-11-04 14:42:04,299 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpqwii1gxk.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpfnmkwqd0.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpji6gxn48.geojson


2025-11-04 14:42:04,985 - INFO - Created 10 records
2025-11-04 14:42:05,026 - INFO - Created 10 records
2025-11-04 14:42:05,026 - INFO - Created 10 records
2025-11-04 14:42:05,100 - INFO - Created 10 records
2025-11-04 14:42:05,100 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpkx4o97z8.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpo39hj6ci.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpmijiqu9h.geojson


2025-11-04 14:42:05,494 - INFO - Created 10 records
2025-11-04 14:42:05,542 - INFO - Created 10 records
2025-11-04 14:42:05,542 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpwlhsopsn.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp5llaw51x.geojson


2025-11-04 14:42:06,294 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpbqya1rhr.geojson


2025-11-04 14:42:07,483 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpwgd33z5w.geojson


2025-11-04 14:42:08,874 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpcrm_6wal.geojson


2025-11-04 14:42:09,972 - INFO - Created 10 records
2025-11-04 14:42:10,079 - INFO - Created 10 records
2025-11-04 14:42:10,079 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpioygmvco.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpy4k3daxq.geojson


2025-11-04 14:42:11,354 - INFO - Created 10 records
2025-11-04 14:42:11,529 - INFO - Created 10 records
2025-11-04 14:42:11,529 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpvzk_c5gd.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp1id3rmsm.geojson
INFO: Progress: 25/100 (25% complete)
INFO: Progress: 25/100 (25% complete)


2025-11-04 14:42:11,864 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp6opz7ujq.geojson


2025-11-04 14:42:12,929 - INFO - Created 10 records
2025-11-04 14:42:12,982 - INFO - Created 10 records
2025-11-04 14:42:12,982 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmprvzsflpa.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpsz13njup.geojson


2025-11-04 14:42:14,269 - INFO - Created 10 records
2025-11-04 14:42:14,552 - INFO - Created 10 records
2025-11-04 14:42:14,552 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmppaemjwf0.geojson


2025-11-04 14:42:14,676 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpnvjxiokk.geojson


2025-11-04 14:42:14,729 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp2b0x5q84.geojson


2025-11-04 14:42:14,911 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpejjwaeps.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp_nkjpb65.geojson


2025-11-04 14:42:15,141 - INFO - Created 10 records
2025-11-04 14:42:15,455 - INFO - Created 10 records
2025-11-04 14:42:15,455 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmptcipv7vl.geojson


2025-11-04 14:42:15,579 - INFO - Created 10 records
2025-11-04 14:42:17,061 - INFO - Created 10 records
2025-11-04 14:42:17,082 - INFO - Created 10 records
2025-11-04 14:42:17,061 - INFO - Created 10 records
2025-11-04 14:42:17,082 - INFO - Created 10 records
2025-11-04 14:42:17,204 - INFO - Created 10 records
2025-11-04 14:42:17,204 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpqxr1acbg.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp213t_bdt.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmplmgj0u_h.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpx9n_j546.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpyirgazts.geojson


2025-11-04 14:42:18,238 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpafvuozx4.geojson


2025-11-04 14:42:19,846 - INFO - Created 10 records
2025-11-04 14:42:20,029 - INFO - Created 10 records
2025-11-04 14:42:20,029 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpcc_mqa7e.geojson


2025-11-04 14:42:20,137 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpv5ed1bm6.geojson


2025-11-04 14:42:20,327 - INFO - Created 10 records
2025-11-04 14:42:20,512 - INFO - Created 10 records
2025-11-04 14:42:20,512 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp8x37pyog.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp31jee_jy.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpp1tdzpsf.geojson


2025-11-04 14:42:20,802 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp458un3ii.geojson


2025-11-04 14:42:22,907 - INFO - Created 10 records
2025-11-04 14:42:22,982 - INFO - Created 10 records
2025-11-04 14:42:23,123 - INFO - Created 10 records
2025-11-04 14:42:22,982 - INFO - Created 10 records
2025-11-04 14:42:23,123 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpze1m_kxo.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpur0081os.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp77s8ro0p.geojson


2025-11-04 14:42:24,089 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp18h50za9.geojson
INFO: Progress: 50/100 (50% complete)


2025-11-04 14:42:24,276 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpsve9a3g7.geojson


2025-11-04 14:42:25,090 - INFO - Created 10 records
2025-11-04 14:42:25,146 - INFO - Created 10 records
2025-11-04 14:42:25,146 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpwmo17zv2.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpno27_out.geojson


2025-11-04 14:42:26,409 - INFO - Created 10 records
2025-11-04 14:42:26,432 - INFO - Created 10 records
2025-11-04 14:42:26,432 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpur_ibkwx.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpqet2znwl.geojson


2025-11-04 14:42:27,266 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpju9f9d4g.geojson


2025-11-04 14:42:27,681 - INFO - Created 10 records
2025-11-04 14:42:27,681 - INFO - Created 10 records
2025-11-04 14:42:27,681 - INFO - Created 10 records
2025-11-04 14:42:27,914 - INFO - Created 10 records
2025-11-04 14:42:27,914 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpoyq8s587.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp07o18rlu.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp4o_y4cri.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp4o_y4cri.geojson


2025-11-04 14:42:28,189 - INFO - Created 10 records
2025-11-04 14:42:28,596 - INFO - Created 10 records
2025-11-04 14:42:28,596 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpkee5l5cv.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpjluoiq2d.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpjluoiq2d.geojson


2025-11-04 14:42:29,974 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpl3xv10ya.geojson


2025-11-04 14:42:30,445 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpn0qbg_s3.geojson


2025-11-04 14:42:31,414 - INFO - Created 10 records
2025-11-04 14:42:31,594 - INFO - Created 10 records
2025-11-04 14:42:31,594 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpioblylak.geojson


2025-11-04 14:42:31,621 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpfmd9mi20.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpnfbg05ww.geojson


2025-11-04 14:42:32,069 - INFO - Created 10 records
2025-11-04 14:42:33,980 - INFO - Created 10 records
2025-11-04 14:42:33,980 - INFO - Created 10 records
2025-11-04 14:42:34,157 - INFO - Created 10 records
2025-11-04 14:42:34,157 - INFO - Created 10 records
2025-11-04 14:42:34,183 - INFO - Created 10 records
2025-11-04 14:42:34,183 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpekruz7ec.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpw5svkh7n.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp99sdln2_.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp34pvr3yv.geojson


2025-11-04 14:42:34,395 - INFO - Created 10 records
2025-11-04 14:42:34,470 - INFO - Created 10 records
2025-11-04 14:42:34,470 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpj2949glg.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpb47o0_el.geojson


2025-11-04 14:42:35,923 - INFO - Created 10 records
2025-11-04 14:42:35,954 - INFO - Created 10 records
2025-11-04 14:42:35,954 - INFO - Created 10 records
2025-11-04 14:42:36,564 - INFO - Created 10 records
2025-11-04 14:42:36,564 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpt16ekkdk.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp48cxrqq7.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp9e6y4ez0.geojson
INFO: Progress: 75/100 (75% complete)
INFO: Progress: 75/100 (75% complete)


2025-11-04 14:42:37,632 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp6tv8hm3l.geojson


2025-11-04 14:42:38,481 - INFO - Created 10 records
2025-11-04 14:42:38,495 - INFO - Created 10 records
2025-11-04 14:42:38,546 - INFO - Created 10 records
2025-11-04 14:42:38,495 - INFO - Created 10 records
2025-11-04 14:42:38,546 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp6j73suc8.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp2p5svv43.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmplxfkhws3.geojson


2025-11-04 14:42:39,100 - INFO - Created 10 records
2025-11-04 14:42:39,216 - INFO - Created 10 records
2025-11-04 14:42:39,216 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp6bz32m4j.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpstqhy39k.geojson




INFO: Progress: 100/100 (100% complete)
INFO: Processing complete: 100/100 batches
INFO: Processing complete: 100/100 batches


  combined = pd.concat(results, ignore_index=True)


INFO: Processed 1,000 features successfully
Using cached schema for national_codes: ['br', 'co', 'ci']
[logger.py | info() | l.23] INFO: Found 5 extra columns: ['actual_area_ha', 'actual_vertices', 'internal_id', 'requested_area_ha', 'requested_vertices']
[logger.py | info() | l.23] INFO: No extra columns found in DataFrame.
Using cached schema for national_codes: ['br', 'co', 'ci']
[logger.py | info() | l.23] INFO: Found 5 extra columns: ['actual_area_ha', 'actual_vertices', 'internal_id', 'requested_area_ha', 'requested_vertices']
[logger.py | info() | l.23] INFO: No extra columns found in DataFrame.
[logger.py | info() | l.23] INFO: custom_bands=None: Excluding all custom bands (strict mode)
INFO: Concurrent processing + formatting + validation complete
[logger.py | info() | l.23] INFO: custom_bands=None: Excluding all custom bands (strict mode)
INFO: Concurrent processing + formatting + validation complete

✅ SUCCESS: Concurrent processing complete!
   Processed: 1000 features
   O

In [41]:
df_concurrent

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,nBR_MapBiomas_col9_palmoil_2020,nBR_MapBiomas_col9_pc_2020,nBR_INPE_TCamz_cer_annual_2020,nBR_MapBiomas_col9_soy_2020,nBR_MapBiomas_col9_annual_crops_2020,nBR_INPE_TCamz_pasture_2020,nBR_INPE_TCcer_pasture_2020,nBR_MapBiomas_col9_pasture_2020,nCI_Cocoa_bnetd,geo
0,1,,8.902,Polygon,COL,CO,Meta,-72.609578,2.938842,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-72.6111..."
1,2,,9.497,Polygon,GUF,GF,Cayenne,-53.409679,5.485465,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-53.4112..."
2,3,,10.219,Polygon,BRA,BR,Pará,-53.385549,-1.670386,ha,...,0.0,0.0,0.0,0.0,0.0,0.011,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-53.3872..."
3,4,,8.489,Polygon,BRA,BR,Bahia,-46.280713,-11.612091,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-46.2823..."
4,5,,8.987,Polygon,BRA,BR,Acre,-71.806918,-9.445431,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-71.8085..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,,9.229,Polygon,BRA,BR,Pará,-55.518330,-4.549403,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-55.5199..."
996,997,,8.707,Polygon,,,,-77.712314,6.057306,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-77.7139..."
997,998,,9.342,Polygon,VEN,VE,Amazonas,-64.843860,4.540215,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-64.8454..."
998,999,,9.216,Polygon,,,,-78.330012,2.786583,ha,...,0.0,0.0,0.0,0.0,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-78.3316..."


## Part 3: SEQUENTIAL PROCESSING (For Comparison)

Test sequential (standard endpoint) processing as an alternative approach

### Part 3A: Switch to Standard Endpoint

Switch from high-volume to standard endpoint for sequential testing

In [42]:
import ee

# Reset Earth Engine completely
ee.Reset()
print("✅ Earth Engine reset")

✅ Earth Engine reset


In [43]:
# Earth Engine initialization with STANDARD endpoint
try:
    ee.Initialize(opt_url='https://earthengine.googleapis.com')
    print("✅ Initialized with standard endpoint")
except Exception:
    ee.Authenticate()
    ee.Initialize(opt_url='https://earthengine.googleapis.com')
    print("✅ Authenticated and initialized with standard endpoint")

✅ Initialized with standard endpoint


In [None]:
# Generate fresh test data for sequential testing (avoid caching)
print("\n" + "="*80)
print("GENERATING TEST DATA FOR SEQUENTIAL")
print("="*80)

# Suppress GeoJSON generation messages
with redirect_stdout(io.StringIO()):
    random_geojson_sequential = whisp.generate_test_polygons(
        bounds=bounds, 
        num_polygons=num_polygons,
        min_area_ha=min_area_ha, 
        max_area_ha=max_area_ha, 
        min_number_vert=min_number_vert,     
        max_number_vert=max_number_vert     
    )

# Save to temporary file
temp_fd_sequential, sequential_geojson_path = tempfile.mkstemp(suffix='.geojson', text=True)
os.close(temp_fd_sequential)
with open(sequential_geojson_path, 'w') as f:
    json.dump(random_geojson_sequential, f)

print(f"✅ Generated test GeoJSON with {len(random_geojson_sequential['features'])} features")
print(f"   Saved to: {sequential_geojson_path}")


GENERATING TEST DATA
[utils.py | generate_test_polygons() | l.378] INFO: Extracting bounds from Earth Engine Geometry...
[utils.py | generate_test_polygons() | l.391] INFO: Bounds: [-78.71, -18.04, -46.06, 6.20]
[utils.py | generate_test_polygons() | l.419] INFO: Generating 1000 test polygons with 10-10 vertices...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 250/1000 polygons (25%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 500/1000 polygons (50%)...
[utils.py | generate_test_polygons() | l.433] INFO: Generated 750/1000 polygons (75%)...
[utils.py | generate_test_polygons() | l.467] INFO: Generated 1000 polygons!
[utils.py | generate_test_polygons() | l.473] INFO: Vertex count - Requested: 10-10, Actual: 10-10
[utils.py | generate_test_polygons() | l.481] INFO: Area (ha) - Requested: 10.0-10.0, Actual: 7.8-10.9
Generated test GeoJSON with 1000 features
   Saved to: C:\Users\Arnell\AppData\Local\Temp\tmp8vutfu1y.geojson


TypeError: string indices must be integers, not 'str'

In [45]:
# Test sequential: GeoJSON → DataFrame (Sequential Processing)
print("\nTEST 2: Sequential GeoJSON → DataFrame (Sequential)")
print("-" * 80)

try:
    df_sequential = whisp.whisp_formatted_stats_geojson_to_df_sequential(
        input_geojson_filepath=sequential_geojson_path,
        national_codes=['BR'],
        add_metadata_client_side=True,
        logger=logger,
    )
    
    print(f"\n✅ SUCCESS: Sequential processing complete!")
    print(f"   Processed: {df_sequential.shape[0]} features")
    print(f"   Output columns: {df_sequential.shape[1]}")
    print("\n   First row preview:")
    print(df_sequential.iloc[0, :8])
    
except Exception as e:
    print(f"\n❌ FAILED: {e}")
    import traceback
    traceback.print_exc()
    df_sequential = None


TEST 2: Sequential GeoJSON → DataFrame (Sequential)
--------------------------------------------------------------------------------

❌ FAILED: name 'sequential_geojson_path' is not defined


Traceback (most recent call last):
  File "C:\Users\Arnell\AppData\Local\Temp\ipykernel_6300\925425433.py", line 7, in <module>
    input_geojson_filepath=sequential_geojson_path,
                           ^^^^^^^^^^^^^^^^^^^^^^^
NameError: name 'sequential_geojson_path' is not defined


In [None]:
# Compare concurrent vs sequential results
print("\nCOMPARISON: Concurrent vs Sequential")
print("=" * 80)

if df_concurrent is not None and df_sequential is not None:
    print(f"\nConcurrent shape:  {df_concurrent.shape}")
    print(f"Sequential shape:  {df_sequential.shape}")
    
    # Verify they return same columns
    if set(df_concurrent.columns) == set(df_sequential.columns):
        print("\n✅ Column names match!")
    else:
        print("\n⚠️  Column names differ")
        print(f"   Concurrent columns: {set(df_concurrent.columns) - set(df_sequential.columns)}")
        print(f"   Sequential columns: {set(df_sequential.columns) - set(df_concurrent.columns)}")
    
    print(f"\n✅ Sequential is simpler and better for debugging")

In [None]:
df_sequential