#### Main changes
- Using selfMask() to avoid empty pixels (a lot of bands are sparse)
- Skipping the validation step – panderas should be fast but there is some temp schema generation that takes time 
- Using high volume end point and concurrent processing
- Using reduceRegions instead of mapped reduceRegion - a chunk of code for choosing ha or percent etc is based on using reduceRegion and it also allowed to skip
- Skipping the use of points to get the admin details (country and level 1 info) and water_flag (should be based on image but was using vector admin still)


In [None]:
import ee

# Reset Earth Engine completely
ee.Reset()

# Initialize with standard (normal) endpoint
# ee.Initialize()

In [None]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path

# Authenticate and initialize Earth Engine
try:
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')  # Try to use existing credentials first
except Exception:
    ee.Authenticate()
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

In [None]:
# !pip install --upgrade --pre openforis-whisp

In [None]:
combined_reducer = ee.Reducer.sum().combine(ee.Reducer.median(),sharedInputs=True)

In [None]:
# Check which endpoint is now active
print("EE Data Base URL:", ee.data._cloud_api_base_url)
print("EE API Base URL:", ee.data._api_base_url)

# Check if using standard endpoint
if 'highvolume' in str(ee.data._cloud_api_base_url):
    print("❌ Still using HIGH-VOLUME endpoint")
else:
    print("✅ Now using STANDARD endpoint")

In [None]:
import geopandas as gpd
import random
import math
import numpy as np
from shapely.geometry import Polygon, Point
from shapely.validation import make_valid
from shapely.geometry import mapping

def generate_random_polygon(
    min_lon, min_lat, max_lon, max_lat, min_area_ha=1, max_area_ha=10, vertex_count=20
):
    """
    Ultra-fast polygon generation using buffer method with EXACT vertex count control
    """
    target_area_ha = random.uniform(min_area_ha, max_area_ha)
    center_lon = random.uniform(min_lon, max_lon)
    center_lat = random.uniform(min_lat, max_lat)
    
    # Estimate radius for target area
    radius_degrees = math.sqrt(target_area_ha / math.pi) / 111.32 / 100
    
    # Create center point
    center_point = Point(center_lon, center_lat)
    
    # METHOD 1: Use buffer with EXACT resolution to control vertices
    # Buffer resolution directly controls vertex count
    poly = center_point.buffer(radius_degrees, resolution=vertex_count//4)
    
    # METHOD 2: Manual vertex creation for EXACT count (more control)
    if vertex_count > 50:  # For high vertex counts, create manually
        # Generate exactly the requested number of vertices
        angles = np.linspace(0, 2 * math.pi, vertex_count, endpoint=False)
        
        # Add controlled randomness to create natural variation
        base_radius = radius_degrees
        
        # Smooth sine wave variations for natural look
        freq1 = random.uniform(2, 5)
        amp1 = random.uniform(0.08, 0.15)  # Small amplitude for smooth shapes
        
        freq2 = random.uniform(8, 15)
        amp2 = random.uniform(0.03, 0.08)
        
        # Calculate radius variations
        radius_variation = (amp1 * np.sin(freq1 * angles + random.uniform(0, 2*math.pi)) + 
                           amp2 * np.sin(freq2 * angles + random.uniform(0, 2*math.pi)))
        
        radii = base_radius * (1.0 + radius_variation)
        radii = np.maximum(radii, base_radius * 0.6)  # Ensure reasonable minimum
        
        # Calculate coordinates
        xs = center_lon + radii * np.cos(angles)
        ys = center_lat + radii * np.sin(angles)
        
        # Clip to bounds
        xs = np.clip(xs, min_lon, max_lon)
        ys = np.clip(ys, min_lat, max_lat)
        
        # Create vertices list with EXACT count
        vertices = list(zip(xs, ys))
        vertices.append(vertices[0])  # Close polygon
        
        # Create polygon
        poly = Polygon(vertices)
        
        # Validate
        if not poly.is_valid:
            poly = make_valid(poly)
            if hasattr(poly, 'geoms'):
                poly = max(poly.geoms, key=lambda p: p.area)
    
    else:
        # For smaller vertex counts, add small perturbations to buffered circle
        coords = list(poly.exterior.coords)
        
        # Resample to get exact vertex count
        if len(coords) - 1 != vertex_count:  # -1 because last point = first point
            # Create new vertices with exact count
            angles = np.linspace(0, 2 * math.pi, vertex_count, endpoint=False)
            
            new_coords = []
            for angle in angles:
                # Base position on circle
                x = center_lon + radius_degrees * math.cos(angle)
                y = center_lat + radius_degrees * math.sin(angle)
                
                # Small random perturbation
                dx = random.uniform(-radius_degrees * 0.08, radius_degrees * 0.08)
                dy = random.uniform(-radius_degrees * 0.08, radius_degrees * 0.08)
                
                # Apply bounds
                new_x = np.clip(x + dx, min_lon, max_lon)
                new_y = np.clip(y + dy, min_lat, max_lat)
                new_coords.append((new_x, new_y))
            
            # Close polygon
            new_coords.append(new_coords[0])
            poly = Polygon(new_coords)
    
    # Calculate area
    actual_area_ha = poly.area * 111320 * 111320 * math.cos(math.radians(center_lat)) / 10000
    
    return poly, actual_area_ha


def generate_properties(area_ha, index):
    """Generate properties for features with sequential internal_id"""
    return {
        "internal_id": index + 1,
        "vertex_count": None,  # Will be set after polygon creation
    }


def create_geojson(
    bounds,
    num_polygons=25,
    min_area_ha=1,
    max_area_ha=10,
    min_number_vert=10,
    max_number_vert=20,
):
    """Create a GeoJSON file with EXACT vertex count control"""
    min_lon, min_lat, max_lon, max_lat = bounds

    print(f"🏗️  Generating {num_polygons} polygons with {min_number_vert}-{max_number_vert} vertices...")
    
    features = []
    
    # Pre-generate ALL random values at once
    vertex_counts = np.random.randint(min_number_vert, max_number_vert + 1, num_polygons)
    center_lons = np.random.uniform(min_lon, max_lon, num_polygons)
    center_lats = np.random.uniform(min_lat, max_lat, num_polygons)
    target_areas = np.random.uniform(min_area_ha, max_area_ha, num_polygons)
    
    for i in range(num_polygons):
        # Show progress for large batches
        if i > 0 and i % 250 == 0:
            print(f"   Generated {i}/{num_polygons} polygons ({i/num_polygons*100:.0f}%)...")
        
        # Use pre-calculated values
        requested_vertices = vertex_counts[i]
        
        # Generate polygon with EXACT vertex count
        polygon, actual_area = generate_random_polygon(
            min_lon, min_lat, max_lon, max_lat,
            min_area_ha=target_areas[i] * 0.9,
            max_area_ha=target_areas[i] * 1.1,
            vertex_count=requested_vertices
        )
        
        # Verify vertex count (excluding closing vertex)
        actual_vertex_count = len(list(polygon.exterior.coords)) - 1
        
        # Create GeoJSON feature with vertex count verification
        properties = generate_properties(actual_area, index=i)
        properties["requested_vertices"] = int(requested_vertices)
        properties["actual_vertices"] = int(actual_vertex_count)
        
        feature = {
            "type": "Feature", 
            "properties": properties,
            "geometry": mapping(polygon),
        }
        
        features.append(feature)

    print(f"✅ Generated {num_polygons} polygons!")
    
    # Print vertex count summary
    actual_counts = [f["properties"]["actual_vertices"] for f in features]
    requested_counts = [f["properties"]["requested_vertices"] for f in features]
    
    print(f"📊 Vertex count summary:")
    print(f"   Requested: {min(requested_counts)}-{max(requested_counts)} vertices")
    print(f"   Actual: {min(actual_counts)}-{max(actual_counts)} vertices")
    print(f"   Average match: {sum(1 for i, j in zip(requested_counts, actual_counts) if abs(i-j) <= 2) / len(features) * 100:.1f}%")
    
    geojson = {"type": "FeatureCollection", "features": features}
    return geojson

In [None]:
import openforis_whisp as whisp



In [None]:

whisp_image = whisp.combine_datasets()

In [21]:
import ee
import geopandas as gpd
import pandas as pd
import time
import threading
from queue import Queue
import logging
from typing import List, Optional, Dict, Any
from concurrent.futures import ThreadPoolExecutor, as_completed
import openforis_whisp as whisp
import tempfile
import os

# Simplified logging setup
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger("whisp-batch")

# Optimized configuration for EE high-volume processing
EE_MAX_CONCURRENT = 10
EE_FEATURES_PER_BATCH = 25
MAX_RETRIES = 3


class OptimizedWhispProcessor:
    """Optimized processor using whisp.convert_geojson_to_ee() with proper file handling"""
    
    def __init__(self, max_concurrent=EE_MAX_CONCURRENT, features_per_batch=EE_FEATURES_PER_BATCH):
        self.max_concurrent = max_concurrent
        self.features_per_batch = features_per_batch
        self.semaphore = threading.Semaphore(max_concurrent)
        self.results = {}
        self.processing_stats = {'completed': 0, 'failed': 0, 'total': 0}
        self.failed_batches = []
        self.max_consecutive_failures = 3  # Stop if 3 batches fail in a row
        
    def process_file_optimized(self, geojson_path: str, national_codes: Optional[List[str]] = None) -> pd.DataFrame:
        """Process file using whisp.convert_geojson_to_ee() with validation"""
        
        print(f"🔍 Loading and validating GeoJSON file...")
        
        # Load and validate the GeoDataFrame first
        try:
            gdf = gpd.read_file(geojson_path)
            print(f"📁 Loaded {len(gdf):,} features from {geojson_path}")
            
            # Basic geometry validation
            invalid_geoms = gdf.geometry.isna().sum()
            if invalid_geoms > 0:
                print(f"⚠️  Found {invalid_geoms} null geometries - removing...")
                gdf = gdf[~gdf.geometry.isna()]
                
            # Check for valid geometries
            valid_geoms = gdf.geometry.is_valid.sum()
            invalid_geom_count = len(gdf) - valid_geoms
            if invalid_geom_count > 0:
                print(f"⚠️  Found {invalid_geom_count} invalid geometries - fixing...")
                from shapely.validation import make_valid
                gdf['geometry'] = gdf['geometry'].apply(lambda g: make_valid(g) if g and not g.is_valid else g)
                
            print(f"✅ Validated {len(gdf):,} geometries")
            
        except Exception as e:
            print(f"❌ Failed to load/validate GeoJSON: {e}")
            return pd.DataFrame()
        
        total_features = len(gdf)
        
        # Split into feature batches
        feature_batches = []
        for i in range(0, total_features, self.features_per_batch):
            batch = gdf.iloc[i:i+self.features_per_batch]
            feature_batches.append(batch)
        
        total_batches = len(feature_batches)
        print(f"📊 Processing {total_features:,} features in {total_batches} batches ({self.features_per_batch} features/batch)")
        print(f"🔄 Running {self.max_concurrent} concurrent requests...")
        print(f"🛑 Will stop if {self.max_consecutive_failures} consecutive batches fail")
        
        # Track progress and failures
        completed_batches = 0
        failed_batches = 0
        consecutive_failures = 0
        
        # Process batches with early stopping 
    
        results = []
        with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
            print(f"🚀 Submitting all {total_batches} batches concurrently...")
            
            # Submit ALL batches at once for maximum concurrency
            future_to_batch = {
                executor.submit(self._process_feature_batch, batch, national_codes, i): i 
                for i, batch in enumerate(feature_batches)
            }
            
            print(f"✅ All batches submitted - processing with {self.max_concurrent} concurrent workers...")
            
            # Collect results with early stopping on consecutive failures
            for future in as_completed(future_to_batch):
                batch_idx = future_to_batch[future]
                try:
                    batch_result = future.result()
                    results.append(batch_result)
                    completed_batches += 1
                    consecutive_failures = 0  # Reset failure counter on success
                    
                    # Show progress every 10 batches or at completion
                    if completed_batches % 10 == 0 or completed_batches == total_batches - failed_batches:
                        success_rate = completed_batches / (completed_batches + failed_batches) * 100 if (completed_batches + failed_batches) > 0 else 0
                        print(f"✅ Progress: {completed_batches}/{total_batches} batches completed ({success_rate:.1f}% success rate)")
                        
                except Exception as e:
                    failed_batches += 1
                    consecutive_failures += 1
                    error_msg = str(e)
                    
                    print(f"❌ Batch {batch_idx + 1} failed: {error_msg[:80]}...")
                    
                    # Early stopping only on excessive consecutive failures
                    if consecutive_failures >= self.max_consecutive_failures:
                        print(f"🛑 STOPPING: {consecutive_failures} consecutive failures detected")
                        print(f"💡 This suggests systematic issues - cancelling remaining batches")
                        
                        # Cancel remaining futures to free up resources
                        for remaining_future in future_to_batch:
                            if not remaining_future.done():
                                remaining_future.cancel()
                        break
                    
                    self.processing_stats['failed'] += 1
                # Final summary
                if results:
                    combined_df = pd.concat(results, ignore_index=True)
                    success_rate = completed_batches / (completed_batches + failed_batches) * 100 if (completed_batches + failed_batches) > 0 else 0
                    print(f"🎉 Successfully processed {len(combined_df):,} features!")
                    print(f"📈 Success rate: {success_rate:.1f}% ({completed_batches}/{completed_batches + failed_batches} batches)")
                    
                    if failed_batches > 0:
                        print(f"⚠️  {failed_batches} batches failed")
                        
                    return combined_df
                else:
                    print("❌ No results produced - all batches failed")
                    print("💡 Suggestions:")
                    print("   - Check if GeoJSON has valid geometries")
                    print("   - Try smaller batch sizes (FEATURES_PER_EE_REQUEST)")
                    print("   - Verify Earth Engine authentication")
                    print("   - Check if features are within valid coordinate ranges")
                    return pd.DataFrame()
    
    def _process_feature_batch(self, batch_gdf: gpd.GeoDataFrame, national_codes: Optional[List[str]], batch_idx: int) -> pd.DataFrame:
        """Process a single batch using whisp.convert_geojson_to_ee() with proper file handling"""
        
        with self.semaphore:
            temp_geojson_path = None
            try:
                # Create temporary file with delete=False to handle manually
                temp_fd, temp_geojson_path = tempfile.mkstemp(suffix='.geojson', text=True)
                
                try:
                    # Close the file descriptor so Windows can access it
                    os.close(temp_fd)
                    
                    # Save batch to temporary GeoJSON file
                    batch_gdf.to_file(temp_geojson_path, driver='GeoJSON')
                    
                    # Use whisp to convert GeoJSON to EE FeatureCollection
                    # This handles reprojection and validation automatically
                    feature_collection = whisp.convert_geojson_to_ee(temp_geojson_path)
                    
                    # Process the FeatureCollection
                    result_df = self._process_ee_feature_collection(feature_collection, national_codes, batch_idx)
                    
                    return result_df
                    
                except Exception as processing_error:
                    raise processing_error
                    
            except Exception as e:
                raise Exception(f"Batch {batch_idx + 1} processing failed: {str(e)}")
                
            finally:
                # Clean up temporary file with proper error handling
                if temp_geojson_path and os.path.exists(temp_geojson_path):
                    try:
                        # Small delay to ensure file is released
                        time.sleep(0.1)
                        os.unlink(temp_geojson_path)
                    except OSError as cleanup_error:
                        # If we can't delete, log it but don't fail
                        logger.warning(f"Could not delete temp file {temp_geojson_path}: {cleanup_error}")

    def _process_ee_feature_collection(self, feature_collection: ee.FeatureCollection, 
                                 national_codes: Optional[List[str]], batch_idx: int) -> pd.DataFrame:
        """Process FeatureCollection with enhanced retry logic"""
        
        for attempt in range(MAX_RETRIES):
            try:
                # Use whisp_image.reduceRegions for processing
                # results = whisp_image.reduceRegions(
                #     collection=feature_collection,
                #     reducer=combined_reducer,
                #     scale=10
                # )
                # df_result = whisp.convert_ee_to_df(results)

                results = whisp.whisp_stats_ee_to_df(
                    feature_collection=feature_collection,
                    # national_codes=national_codes,
                    whisp_image=whisp_image
                )
                df_result = results

           

                return df_result
                
            except ee.EEException as e:
                error_msg = str(e)
                
                # Check for specific geometry errors
                if "Unable to transform geometry" in error_msg:
                    raise Exception(f"Geometry transformation error in batch {batch_idx + 1}: {error_msg}")
                elif "Quota" in error_msg or "limit" in error_msg.lower():
                    if attempt < MAX_RETRIES - 1:
                        backoff = min(30, 2 ** attempt)
                        print(f"⏳ Quota/rate limit hit, waiting {backoff}s before retry...")
                        time.sleep(backoff)
                    else:
                        raise Exception(f"Quota/rate limit exhausted for batch {batch_idx + 1}")
                elif "timeout" in error_msg.lower():
                    if attempt < MAX_RETRIES - 1:
                        backoff = min(15, 2 ** attempt)
                        print(f"⏳ Timeout, retrying in {backoff}s...")
                        time.sleep(backoff)
                    else:
                        raise e
                else:
                    if attempt < MAX_RETRIES - 1:
                        backoff = min(10, 2 ** attempt)
                        time.sleep(backoff)
                    else:
                        raise e
                        
            except Exception as e:
                if attempt < MAX_RETRIES - 1:
                    backoff = min(5, 2 ** attempt)
                    time.sleep(backoff)
                else:
                    raise e
        
        raise RuntimeError(f"Failed to process batch {batch_idx + 1} after {MAX_RETRIES} attempts")

In [22]:
!pip show openforis-whisp

Name: openforis-whisp
Version: 2.0.0b1
Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
Home-page: 
Author: Andy Arnell
Author-email: andrew.arnell@fao.org
License: MIT
Location: c:\Users\Arnell\Documents\GitHub\whisp\.venv\Lib\site-packages
Editable project location: C:\Users\Arnell\Documents\GitHub\whisp
Requires: country_converter, earthengine-api, geojson, geopandas, ipykernel, numpy, pandas, pandera, pydantic-core, python-dotenv, rsa, shapely
Required-by: 


In [23]:
folder_path = (r"C:\Users\Arnell\Downloads\a_processing_tests")  # Replace with your folder path

In [24]:
GEOJSON_EXAMPLE_FILEPATH = folder_path+"/random_polygons.geojson"

# Define bounds from the provided Earth Engine geometry
# # area in Ghana 
# bounds = [ 
#     -3.04548260909834,  # min_lon
#     5.253961384163733,  # min_lat
#     -1.0179939534016594,  # max_lon
#     7.48307210714245    # max_lat
# ]

# # area in China
# bounds = [
#     90.44831497309737,  # min_lon
#     20.686366665187148,  # min_lat
#     114.57868606684737,  # max_lon
#     30.79200348254393    # max_lat
# ]

# Brazil etc
bounds = [-81.06002305884182,
        -19.332462745930076,
        -31.48971055884182,
         9.600139384904205
        ]

In [25]:
# random_geojson = whisp.create_geojson(
random_geojson = create_geojson(
    bounds, 
    num_polygons=1000, 
    min_area_ha=5, 
    max_area_ha=10, 
    min_number_vert=90, 
    max_number_vert=100)

GEOJSON_EXAMPLE_FILEPATH = folder_path + "/random_polygons.geojson"
print(GEOJSON_EXAMPLE_FILEPATH)
import json
# Save the GeoJSON to a file
with open(GEOJSON_EXAMPLE_FILEPATH, 'w') as f:
    json.dump(random_geojson, f)

# Use example Whisp inputs (optional)
# GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")


# Add IDs to your existing GeoJSON file

# #Save to a new file (instead of overwriting)
# # whisp.reformat_geojson_properties(
# whisp.reformat_geojson_properties(
    
#     geojson_path=GEOJSON_EXAMPLE_FILEPATH, 
#     id_field="internal_id",
#     output_path=folder_path + "/random_polygons_with_ids.geojson",
#     remove_properties=True
# )

🏗️  Generating 1000 polygons with 90-100 vertices...
   Generated 250/1000 polygons (25%)...
   Generated 500/1000 polygons (50%)...
   Generated 750/1000 polygons (75%)...
✅ Generated 1000 polygons!
📊 Vertex count summary:
   Requested: 90-100 vertices
   Actual: 90-100 vertices
   Average match: 100.0%
C:\Users\Arnell\Downloads\a_processing_tests/random_polygons.geojson


In [26]:
# GEOJSON_EXAMPLE_FILEPATH = folder_path+"/RSPO-Concessions-Version-10-May-2025.geojson"

In [27]:

# Example usage with controlled batch sizes
if __name__ == "__main__":
    
    # Configure batch size based on your data characteristics
    FEATURES_PER_EE_REQUEST = 50  # Small batches for complex geometries
    MAX_CONCURRENT_EE_REQUESTS = 20  # Conservative for quota management
    
    # Initialize processor
    processor = OptimizedWhispProcessor(
    
        max_concurrent=MAX_CONCURRENT_EE_REQUESTS,
        features_per_batch=FEATURES_PER_EE_REQUEST
    )
    
    # Process file with controlled batching
    try:
        # GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")
        
        logger.info(f"Processing with {FEATURES_PER_EE_REQUEST} features per Earth Engine request")
        logger.info(f"Maximum {MAX_CONCURRENT_EE_REQUESTS} concurrent requests")
        
        result_df = processor.process_file_optimized(
            GEOJSON_EXAMPLE_FILEPATH, 
            # national_codes=["br", "co"]
        )
        
        if not result_df.empty:
            print(f"Success! Processed {len(result_df)} features")
            print("\nFirst 5 rows:")
            print(result_df.head())
            
            # Save results
            result_df.to_csv(Path.home() / "downloads"/ "optimized_whisp_results.csv", index=False)
            logger.info("Results saved to optimized_whisp_results.csv")
        else:
            print("No results produced")
            
        print(f"Processing stats: {processor.processing_stats}")
        
    except Exception as e:
        logger.error(f"Processing failed: {e}")

2025-10-14 22:23:52,659 - INFO - Processing with 50 features per Earth Engine request
2025-10-14 22:23:52,660 - INFO - Maximum 20 concurrent requests
2025-10-14 22:23:53,317 - INFO - Created 50 records


🔍 Loading and validating GeoJSON file...
📁 Loaded 1,000 features from C:\Users\Arnell\Downloads\a_processing_tests/random_polygons.geojson
✅ Validated 1,000 geometries
📊 Processing 1,000 features in 20 batches (50 features/batch)
🔄 Running 20 concurrent requests...
🛑 Will stop if 3 consecutive batches fail
🚀 Submitting all 20 batches concurrently...


2025-10-14 22:23:53,393 - INFO - Created 50 records
2025-10-14 22:23:53,541 - INFO - Created 50 records
2025-10-14 22:23:53,592 - INFO - Created 50 records
2025-10-14 22:23:53,660 - INFO - Created 50 records
2025-10-14 22:23:53,708 - INFO - Created 50 records
2025-10-14 22:23:53,739 - INFO - Created 50 records
2025-10-14 22:23:53,885 - INFO - Created 50 records
2025-10-14 22:23:53,885 - INFO - Created 50 records
2025-10-14 22:23:54,036 - INFO - Created 50 records
2025-10-14 22:23:54,036 - INFO - Created 50 records
2025-10-14 22:23:54,143 - INFO - Created 50 records


✅ All batches submitted - processing with 20 concurrent workers...


2025-10-14 22:23:54,238 - INFO - Created 50 records
2025-10-14 22:23:54,287 - INFO - Created 50 records
2025-10-14 22:23:54,302 - INFO - Created 50 records
2025-10-14 22:23:54,403 - INFO - Created 50 records
2025-10-14 22:23:54,544 - INFO - Created 50 records
2025-10-14 22:23:54,568 - INFO - Created 50 records
2025-10-14 22:23:54,766 - INFO - Created 50 records
2025-10-14 22:23:54,975 - INFO - Created 50 records


🎉 Successfully processed 50 features!
📈 Success rate: 100.0% (1/1 batches)


2025-10-14 22:25:37,354 - INFO - Results saved to optimized_whisp_results.csv


Success! Processed 50 features

First 5 rows:
                                                 geo Admin_Level_1   Area  \
0  {'type': 'Polygon', 'coordinates': [[[-43.8103...         Piaui  0.058   
1  {'type': 'Polygon', 'coordinates': [[[-33.2148...       Unknown  0.074   
2  {'type': 'Polygon', 'coordinates': [[[-66.9193...       Bolívar  0.077   
3  {'type': 'Polygon', 'coordinates': [[[-79.9223...       Unknown  0.099   
4  {'type': 'Polygon', 'coordinates': [[[-53.3352...         Amapa  0.073   

   Centroid_lat  Centroid_lon  Cocoa_2023_FDaP  Cocoa_ETH  Cocoa_FDaP  \
0     -9.784622    -43.810237                0          0           0   
1     -2.236516    -33.214678                0          0           0   
2      6.993892    -66.919179                0          0           0   
3    -14.041860    -79.922174                0          0           0   
4      1.946154    -53.335117                0          0           0   

   Coffee_FDaP  Coffee_FDaP_2023  ... TMF_deg_2023  

In [None]:
result_df  # Display first few rows of combined results

In [None]:
# result_df_no_geo = result_df.drop(columns=['geo'], errors='ignore')

In [None]:
result_df_no_geo

In [None]:
results_from_ee = pd.read_csv(Path.home() / 'downloads' / "whisp_RSPO_Concessions_May_2025_output_table_w_risk.csv")

In [None]:
results_from_ee

In [None]:
# Define the output folder 
# e.g. in running in Sepal this might be: Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table.csv'

# Save the CSV file
result_df.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table with risk columns saved to: {csv_output_file}")

In [None]:
# Define the output file path for GeoJSON
geojson_output_file = out_directory / 'whisp_output_geo.geojson'

# Save the GeoJSON file
whisp.convert_df_to_geojson(result_df, geojson_output_file)  # builds a geojson file containing Whisp columns. Uses the geometry column "geo" to create the spatial features.
print(f"GeoJSON file saved to: {geojson_output_file}")

Classic Whisp

In [None]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path

# Authenticate and initialize Earth Engine
try:
    ee.Initialize()  # Try to use existing credentials first
except Exception:
    ee.Authenticate()
    ee.Initialize()

In [None]:
import openforis_whisp as whisp


In [None]:
!pip show openforis-whisp

In [None]:
#### whisp = whisp.whisp_formatted_stats_geojson_to_df(GEOJSON_EXAMPLE_FILEPATH)
# whisp = whisp.whisp_stats_geojson_to_df(GEOJSON_EXAMPLE_FILEPATH,whisp_image=whisp_image)

In [None]:
import openforis_whisp as whisp
fc = whisp.convert_geojson_to_ee(GEOJSON_EXAMPLE_FILEPATH)
print(fc.size().getInfo())  # Print number of features in the collection


In [None]:
whisp_image = whisp.combine_datasets()

In [None]:
combined_reducer = ee.Reducer.sum().combine(ee.Reducer.median(),sharedInputs=True)

In [None]:
results = whisp_image.reduceRegions(fc.limit(10), reducer=combined_reducer, scale=10)

In [None]:
whisp.convert_ee_to_df(results)