### Whisp a feature collection

### Setup
- NB use a virtual environment to avoid altering your python environment (https://docs.python.org/3/tutorial/venv.html)

In [1]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path

try:
    ee.Initialize(project='ee-andyarnellgee', opt_url='https://earthengine-highvolume.googleapis.com')
except Exception:
    ee.Authenticate()
    ee.Initialize(project='ee-andyarnellgee', opt_url='https://earthengine-highvolume.googleapis.com')

In [2]:
# Install openforis-whisp (uncomment line if not already installed)
# !pip install --pre openforis-whisp 

# NB installed in editable mode (from terminal: pip install -e [dev])

Installing other requirements 

In [3]:
import openforis_whisp as whisp
import geopandas as gpd
import pandas as pd
from pathlib import Path
import json
import geemap


Make a folder

In [4]:
folder_path = 'C:/Users/Arnell/Downloads/whisp_example_demo_2' #COGS
Path(folder_path).mkdir(parents=True, exist_ok=True)
print(f"Folder ready: {folder_path}")

Folder ready: C:/Users/Arnell/Downloads/whisp_example_demo_2


Get a feature collection

In [5]:
GEOJSON_EXAMPLE_FILEPATH = folder_path+"/random_polygons.geojson"

# Define bounds from the provided Earth Engine geometry
# area in Ghana 
bounds = [ 
    -3.04548260909834,  # min_lon
    5.253961384163733,  # min_lat
    -1.0179939534016594,  # max_lon
    7.48307210714245    # max_lat
]

# area in China
# bounds = [
#     103.44831497309737,  # min_lon
#     25.686366665187148,  # min_lat
#     109.57868606684737,  # max_lon
#     28.79200348254393    # max_lat
# ]

In [6]:
random_geojson = whisp.create_geojson(
    bounds, 
    num_polygons=200, 
    min_area_ha=1, 
    max_area_ha=10, 
    min_number_vert=100, 
    max_number_vert=2000)

GEOJSON_EXAMPLE_FILEPATH = folder_path + "/random_polygons.geojson"

# Save the GeoJSON to a file
with open(GEOJSON_EXAMPLE_FILEPATH, 'w') as f:
    json.dump(random_geojson, f)

# Use example Whisp inputs (optional)
# GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")


# Add IDs to your existing GeoJSON file

#Save to a new file (instead of overwriting)
whisp.reformat_geojson_properties(
    geojson_path=GEOJSON_EXAMPLE_FILEPATH, 
    id_field="internal_id",
    output_path=folder_path + "/random_polygons_with_ids.geojson",
    remove_properties=True
)


2025-05-05 17:13:47,383 - INFO - Created 200 records


Added internal_id to GeoJSON and saved to C:/Users/Arnell/Downloads/whisp_example_demo_2/random_polygons_with_ids.geojson


### Local Whisp stats processing chain

Input example geojson


In [7]:
GEOJSON_EXAMPLE_FILEPATH = folder_path + "/random_polygons_with_ids.geojson"

Obscure/hide the input polygon locations using bounding boxes

In [8]:
# # get feature collection from geojson
# ee_collection = whisp.convert_geojson_to_ee(
#  GEOJSON_EXAMPLE_FILEPATH
# )

# # slight obscuration - bounding box
# ee_bbox_collection = whisp.convert_geojson_to_ee_bbox(
# GEOJSON_EXAMPLE_FILEPATH
# )

# # Full obscuration - extend, shift, and add random features
# fully_obscured_collection = whisp.convert_geojson_to_ee_bbox_obscured(
#     GEOJSON_EXAMPLE_FILEPATH,
#     extension_range=[0.002, 0.005],
#     shift_geometries=True,
#     shift_proportion=0.9,
#     pixel_length=0.0001,  # ~10m at equator
#     add_random_features=True,
#     max_distance=0.05,  # xkm at equator
#     random_proportion= 0.3  # Add X more features as decoys
# )

# map = geemap.Map()

# map.addLayer(fully_obscured_collection, {}, "Fully Obscured Collection")
# map.addLayer (ee_bbox_collection, {}, "Original bbox Collection")
# map.addLayer (ee_collection, {}, "Original Collection")


# map.centerObject(ee_collection.first(), 10)  # Center the map on the first feature in the collection

# map

In [9]:
# Full obscuration - extend, shift, and add random features
fully_obscured_collection = whisp.convert_geojson_to_ee_bbox_obscured(
    GEOJSON_EXAMPLE_FILEPATH,
    extension_range=[0.002, 0.005],
    shift_geometries=True,
    shift_proportion=0.9,
    pixel_length=0.0001,  # ~10m at equator
    add_random_features=True,
    max_distance=0.05,  # xkm at equator
    random_proportion= 0.3  # Add X more features as decoys
)

# Parallel processing (faster for many features)
geotiff_paths = whisp.download_geotiffs_for_feature_collection(
    # feature_collection=ee_bbox_collection,
    feature_collection=fully_obscured_collection,
    output_dir=folder_path,
    # feature_collection=fully_obscured_collection,
    image=whisp.combine_datasets(),
    max_features=1000,
    max_workers=40  # Process X features concurrently
)
whisp.create_vrt_from_folder(folder_path)

# Using parallel processing with 8 workers:
stats = whisp.exact_extract_in_chunks_parallel(
    # rasters=tif_files,
    rasters=folder_path+'/combined_rasters.vrt',
    vector_file= GEOJSON_EXAMPLE_FILEPATH,
    chunk_size=25,
    ops=['sum'],
    max_workers=20  # Adjust based on your CPU cores

# Suggestion to use chunk_size that results in 2-4× the number of chunks as you have worker threads. 
# For 20 workers, aim for 40-80 total chunks (which means chunk_size = total_features ÷ 40-80).
)

Reading GeoJSON file from: C:\Users\Arnell\Downloads\whisp_example_demo_2\random_polygons_with_ids.geojson
Added 60 random decoy features to obscure real locations
Created Earth Engine FeatureCollection with 260 bounding box features
['Area', 'European_Primary_Forest', 'GLC_FCS30D_TC_2022', 'GLC_FCS30D_crop_2022', 'IFL_2020', 'IIASA_planted_plantation', 'Cocoa_bnetd', 'Oil_palm_Descals', 'ESA_fire_before_2020', 'ESA_fire_2001', 'ESA_fire_2002', 'ESA_fire_2003', 'ESA_fire_2004', 'ESA_fire_2005', 'ESA_fire_2006', 'ESA_fire_2007', 'ESA_fire_2008', 'ESA_fire_2009', 'ESA_fire_2010', 'ESA_fire_2011', 'ESA_fire_2012', 'ESA_fire_2013', 'ESA_fire_2014', 'ESA_fire_2015', 'ESA_fire_2016', 'ESA_fire_2017', 'ESA_fire_2018', 'ESA_fire_2019', 'ESA_fire_2020', 'ESA_TC_2020', 'ESRI_2023_TC', 'ESRI_2023_crop', 'Cocoa_ETH', 'Cocoa_2023_FDaP', 'Cocoa_FDaP', 'Forest_FDaP', 'Oil_palm_2023_FDaP', 'Oil_palm_FDaP', 'Rubber_2023_FDaP', 'Rubber_FDaP', 'GFT_naturally_regenerating', 'GFT_planted_plantation', 'GFT_

2025-05-05 17:13:53,331 - INFO - Processing Earth Engine FeatureCollection with 260 features
2025-05-05 17:13:53,331 - INFO - Using parallel processing with 40 workers
2025-05-05 17:13:54,543 - INFO - Downloading GeoTIFF for feature 1
2025-05-05 17:13:54,781 - INFO - Downloading GeoTIFF for feature 4
2025-05-05 17:13:54,781 - INFO - Downloading GeoTIFF for feature 3
2025-05-05 17:13:54,781 - INFO - Downloading GeoTIFF for feature 11
2025-05-05 17:13:54,982 - INFO - Downloading GeoTIFF for feature 15
2025-05-05 17:13:55,014 - INFO - Downloading GeoTIFF for feature 2
2025-05-05 17:13:55,029 - INFO - Downloading GeoTIFF for feature 6
2025-05-05 17:13:55,029 - INFO - Downloading GeoTIFF for feature 8
2025-05-05 17:13:55,029 - INFO - Downloading GeoTIFF for feature 5
2025-05-05 17:13:55,029 - INFO - Downloading GeoTIFF for feature 7
2025-05-05 17:13:55,112 - INFO - Downloading GeoTIFF for feature 9
2025-05-05 17:13:55,364 - INFO - Downloading GeoTIFF for feature 13
2025-05-05 17:13:55,478 -

Found 200 TIF files to include in the VRT




VRT file created at: C:\Users\Arnell\Downloads\whisp_example_demo_2\combined_rasters.vrt
Reading vector file: C:/Users/Arnell/Downloads/whisp_example_demo_2/random_polygons_with_ids.geojson
Processing in 8 chunks of up to 25 features each
Using 20 parallel workers
Results from chunk 1 stored
Results from chunk 2 stored
Results from chunk 3 stored
Results from chunk 4 stored
Results from chunk 5 stored
Results from chunk 6 stored
Results from chunk 7 stored
Results from chunk 8 stored
Combining results from 8 chunks...
Processing complete. Processed 200/200 features in 51.44s


Save the results of the local processing

In [10]:
stats.to_csv(folder_path+'/whisp_output_local_processing.csv', index=False)

## Regular Whisp 

In [11]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path

try:
    ee.Initialize(project='ee-andyarnellgee')#, opt_url='https://earthengine-highvolume.googleapis.com')
except Exception:
    ee.Authenticate()
    ee.Initialize(project='ee-andyarnellgee')# opt_url='https://earthengine-highvolume.googleapis.com')

### Whisp it

In [15]:
df_stats = whisp.whisp_formatted_stats_geojson_to_df(input_geojson_filepath=GEOJSON_EXAMPLE_FILEPATH,external_id_column=None)

Reading GeoJSON file from: C:\Users\Arnell\Downloads\whisp_example_demo_2\random_polygons_with_ids.geojson
['Area', 'European_Primary_Forest', 'GLC_FCS30D_TC_2022', 'GLC_FCS30D_crop_2022', 'IFL_2020', 'IIASA_planted_plantation', 'Cocoa_bnetd', 'Oil_palm_Descals', 'ESA_fire_before_2020', 'ESA_fire_2001', 'ESA_fire_2002', 'ESA_fire_2003', 'ESA_fire_2004', 'ESA_fire_2005', 'ESA_fire_2006', 'ESA_fire_2007', 'ESA_fire_2008', 'ESA_fire_2009', 'ESA_fire_2010', 'ESA_fire_2011', 'ESA_fire_2012', 'ESA_fire_2013', 'ESA_fire_2014', 'ESA_fire_2015', 'ESA_fire_2016', 'ESA_fire_2017', 'ESA_fire_2018', 'ESA_fire_2019', 'ESA_fire_2020', 'ESA_TC_2020', 'ESRI_2023_TC', 'ESRI_2023_crop', 'Cocoa_ETH', 'Cocoa_2023_FDaP', 'Cocoa_FDaP', 'Forest_FDaP', 'Oil_palm_2023_FDaP', 'Oil_palm_FDaP', 'Rubber_2023_FDaP', 'Rubber_FDaP', 'GFT_naturally_regenerating', 'GFT_planted_plantation', 'GFT_primary', 'GFC_TC_2020', 'GFC_loss_after_2020', 'GFC_loss_before_2020', 'GFC_loss_year_2001', 'GFC_loss_year_2002', 'GFC_loss_y

### Display results

In [13]:
df_stats

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,TMF_regrowth_2023,ESRI_2023_TC,GLC_FCS30D_TC_2022,Oil_palm_2023_FDaP,Rubber_2023_FDaP,Cocoa_2023_FDaP,ESRI_2023_crop,GLC_FCS30D_crop_2022,GFW_logging,geo
0,1,,6.766,Polygon,GHA,GH,Western North Region,-2.738693,5.534424,ha,...,0.261,6.766,6.766,0.102,0.0,0.365,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.74036..."
1,2,,4.654,Polygon,GHA,GH,Western Region,-1.802213,5.444188,ha,...,0.000,4.654,4.654,0.074,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.80359..."
2,3,,6.700,Polygon,GHA,GH,Western North Region,-2.478063,6.162471,ha,...,2.656,6.700,6.700,0.000,0.0,1.294,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.47981..."
3,4,,6.035,Polygon,GHA,GH,Central Region,-1.332689,5.599102,ha,...,0.000,6.035,6.035,0.029,0.0,0.000,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.33430..."
4,5,,7.311,Polygon,GHA,GH,Ashanti Region,-1.776706,6.659190,ha,...,1.971,7.311,6.731,1.608,0.0,1.484,0.000,0.580,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.77825..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,,1.171,Polygon,GHA,GH,Ashanti Region,-1.385374,7.203355,ha,...,0.000,0.000,0.456,0.000,0.0,0.000,1.171,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.38606..."
196,197,,5.332,Polygon,GHA,GH,Ashanti Region,-1.119683,7.154390,ha,...,0.000,2.811,4.886,0.000,0.0,0.000,0.000,0.014,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.12114..."
197,198,,3.728,Polygon,GHA,GH,Ashanti Region,-2.106688,7.000327,ha,...,0.520,3.728,3.728,0.000,0.0,1.026,0.000,0.000,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.10798..."
198,199,,10.841,Polygon,GHA,GH,Ashanti Region,-2.092487,7.437024,ha,...,0.133,9.846,10.732,0.000,0.0,0.013,0.000,0.109,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.09476..."


### Export table to CSV

In [14]:
df_stats.to_csv(folder_path+"/"+"whisp_output_regular.csv",index=False)