### Whisp a feature collection

### Setup
- NB use a virtual environment to avoid altering your python environment (https://docs.python.org/3/tutorial/venv.html)

In [1]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path

try:
    ee.Initialize(project='ee-andyarnellgee', opt_url='https://earthengine-highvolume.googleapis.com')
except Exception:
    ee.Authenticate()
    ee.Initialize(project='ee-andyarnellgee', opt_url='https://earthengine-highvolume.googleapis.com')

In [2]:
# Install openforis-whisp (uncomment line if not already installed)
# !pip install --pre openforis-whisp 

# NB installed in editable mode (from terminal: pip install -e [dev])

Installing other requirements 

In [3]:
import openforis_whisp as whisp
import geopandas as gpd
import pandas as pd
from pathlib import Path
import json
import geemap


Make a folder

In [4]:
folder_path = 'C:/Users/Arnell/Downloads/whisp_example_demo' #COGS
Path(folder_path).mkdir(parents=True, exist_ok=True)
print(f"Folder ready: {folder_path}")

Folder ready: C:/Users/Arnell/Downloads/whisp_example_demo


Get a feature collection

In [5]:
GEOJSON_EXAMPLE_FILEPATH = folder_path+"/random_polygons.geojson"

# Define bounds from the provided Earth Engine geometry
# area in Ghana 
bounds = [ 
    -3.04548260909834,  # min_lon
    5.253961384163733,  # min_lat
    -1.0179939534016594,  # max_lon
    7.48307210714245    # max_lat
]

# area in China
# bounds = [
#     103.44831497309737,  # min_lon
#     25.686366665187148,  # min_lat
#     109.57868606684737,  # max_lon
#     28.79200348254393    # max_lat
# ]

In [6]:
random_geojson = whisp.create_geojson(
    bounds, 
    num_polygons=50, 
    min_area_ha=1, 
    max_area_ha=10, 
    min_number_vert=100, 
    max_number_vert=2000)

GEOJSON_EXAMPLE_FILEPATH = folder_path + "/random_polygons.geojson"

# Save the GeoJSON to a file
with open(GEOJSON_EXAMPLE_FILEPATH, 'w') as f:
    json.dump(random_geojson, f)

# Use example Whisp inputs (optional)
# GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")


# Add IDs to your existing GeoJSON file

#Save to a new file (instead of overwriting)
whisp.reformat_geojson_properties(
    geojson_path=GEOJSON_EXAMPLE_FILEPATH, 
    id_field="internal_id",
    output_path=folder_path + "/random_polygons_with_ids.geojson",
    remove_properties=True
)


2025-04-29 20:16:59,740 - INFO - Created 50 records


Added internal_id to GeoJSON and saved to C:/Users/Arnell/Downloads/whisp_example_demo/random_polygons_with_ids.geojson


### Local Whisp stats processing chain

Input example geojson


In [7]:
GEOJSON_EXAMPLE_FILEPATH = folder_path + "/random_polygons_with_ids.geojson"

Obscure/hide the input polygon locations using bounding boxes

In [None]:
# # get feature collection from geojson
# ee_collection = whisp.convert_geojson_to_ee(
#  GEOJSON_EXAMPLE_FILEPATH
# )

# # slight obscuration - bounding box
# ee_bbox_collection = whisp.convert_geojson_to_ee_bbox(
# GEOJSON_EXAMPLE_FILEPATH
# )

# # Full obscuration - extend, shift, and add random features
# fully_obscured_collection = whisp.convert_geojson_to_ee_bbox_obscured(
#     GEOJSON_EXAMPLE_FILEPATH,
#     extension_range=[0.002, 0.005],
#     shift_geometries=True,
#     shift_proportion=0.9,
#     pixel_length=0.0001,  # ~10m at equator
#     add_random_features=True,
#     max_distance=0.05,  # xkm at equator
#     random_proportion= 0.3  # Add X more features as decoys
# )

# map = geemap.Map()

# map.addLayer(fully_obscured_collection, {}, "Fully Obscured Collection")
# map.addLayer (ee_bbox_collection, {}, "Original bbox Collection")
# map.addLayer (ee_collection, {}, "Original Collection")


# map.centerObject(ee_collection.first(), 10)  # Center the map on the first feature in the collection

# map

In [None]:
# Full obscuration - extend, shift, and add random features
fully_obscured_collection = whisp.convert_geojson_to_ee_bbox_obscured(
    GEOJSON_EXAMPLE_FILEPATH,
    extension_range=[0.002, 0.005],
    shift_geometries=True,
    shift_proportion=0.9,
    pixel_length=0.0001,  # ~10m at equator
    add_random_features=True,
    max_distance=0.05,  # xkm at equator
    random_proportion= 0.3  # Add X more features as decoys
)

# Parallel processing (faster for many features)
geotiff_paths = whisp.download_geotiffs_for_feature_collection(
    # feature_collection=ee_bbox_collection,
    feature_collection=fully_obscured_collection,
    output_dir=folder_path,
    # feature_collection=fully_obscured_collection,
    image=whisp.combine_datasets(),
    max_features=1000,
    max_workers=40  # Process X features concurrently
)
whisp.create_vrt_from_folder(folder_path)

# Using parallel processing with 8 workers:
stats = whisp.exact_extract_in_chunks_parallel(
    # rasters=tif_files,
    rasters=folder_path+'/combined_rasters.vrt',
    vector_file= GEOJSON_EXAMPLE_FILEPATH,
    chunk_size=25,
    ops=['sum'],
    max_workers=20  # Adjust based on your CPU cores

# Suggestion to use chunk_size that results in 2-4× the number of chunks as you have worker threads. 
# For 20 workers, aim for 40-80 total chunks (which means chunk_size = total_features ÷ 40-80).
)

['Area', 'European_Primary_Forest', 'GLC_FCS30D_TC_2022', 'GLC_FCS30D_crop_2022', 'IFL_2020', 'IIASA_planted_plantation', 'Cocoa_bnetd', 'Oil_palm_Descals', 'ESA_fire_before_2020', 'ESA_fire_2001', 'ESA_fire_2002', 'ESA_fire_2003', 'ESA_fire_2004', 'ESA_fire_2005', 'ESA_fire_2006', 'ESA_fire_2007', 'ESA_fire_2008', 'ESA_fire_2009', 'ESA_fire_2010', 'ESA_fire_2011', 'ESA_fire_2012', 'ESA_fire_2013', 'ESA_fire_2014', 'ESA_fire_2015', 'ESA_fire_2016', 'ESA_fire_2017', 'ESA_fire_2018', 'ESA_fire_2019', 'ESA_fire_2020', 'ESA_TC_2020', 'ESRI_2023_TC', 'ESRI_2023_crop', 'Cocoa_ETH', 'Cocoa_2023_FDaP', 'Cocoa_FDaP', 'Forest_FDaP', 'Oil_palm_2023_FDaP', 'Oil_palm_FDaP', 'Rubber_2023_FDaP', 'Rubber_FDaP', 'GFT_naturally_regenerating', 'GFT_planted_plantation', 'GFT_primary', 'GFC_TC_2020', 'GFC_loss_after_2020', 'GFC_loss_before_2020', 'GFC_loss_year_2001', 'GFC_loss_year_2002', 'GFC_loss_year_2003', 'GFC_loss_year_2004', 'GFC_loss_year_2005', 'GFC_loss_year_2006', 'GFC_loss_year_2007', 'GFC_los

2025-04-29 20:17:13,164 - INFO - Processing Earth Engine FeatureCollection with 65 features
2025-04-29 20:17:13,164 - INFO - Using parallel processing with 40 workers
2025-04-29 20:17:13,834 - INFO - Downloading GeoTIFF for feature 8
2025-04-29 20:17:13,933 - INFO - Downloading GeoTIFF for feature 15
2025-04-29 20:17:14,071 - INFO - Downloading GeoTIFF for feature 28
2025-04-29 20:17:14,133 - INFO - Downloading GeoTIFF for feature 1
2025-04-29 20:17:14,224 - INFO - Downloading GeoTIFF for feature 5
2025-04-29 20:17:14,233 - INFO - Downloading GeoTIFF for feature 2
2025-04-29 20:17:14,249 - INFO - Downloading GeoTIFF for feature 6
2025-04-29 20:17:14,270 - INFO - Downloading GeoTIFF for feature 7
2025-04-29 20:17:14,270 - INFO - Downloading GeoTIFF for feature 9
2025-04-29 20:17:14,270 - INFO - Downloading GeoTIFF for feature 3
2025-04-29 20:17:14,314 - INFO - Downloading GeoTIFF for feature 12
2025-04-29 20:17:14,348 - INFO - Downloading GeoTIFF for feature 10
2025-04-29 20:17:14,353 -

Found 50 TIF files to include in the VRT




VRT file created at: C:\Users\Arnell\Downloads\whisp_example_demo\combined_rasters.vrt
Reading vector file: C:/Users/Arnell/Downloads/whisp_example_demo/random_polygons_with_ids.geojson
Total features to process: 50
Processing in 4 chunks of up to 15 features each
Using 20 parallel workers
Starting chunk 1/4 (features 1-15)
Starting chunk 2/4 (features 16-30)
Starting chunk 3/4 (features 31-45)
Starting chunk 4/4 (features 46-50)




Completed chunk 4/4 in 4.65s
Chunk 4 integrated into results




Completed chunk 2/4 in 8.83s
Chunk 2 integrated into results




Completed chunk 3/4 in 9.42s
Chunk 3 integrated into results
Completed chunk 1/4 in 9.82s
Chunk 1 integrated into results
Processing complete. Processed 50/50 features in 9.85s


Save the results of the local processing

In [None]:
stats.to_csv(folder_path+'/whisp_output_local_processing.csv', index=False)

Parallel processing complete.
Error removing TIF files: [WinError 32] The process cannot access the file because it is being used by another process: 'C:/Users/Arnell/Downloads/whisp_example_demo\\feature_1.tif'


## Regular Whisp 

In [40]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path

try:
    ee.Initialize(project='ee-andyarnellgee')#, opt_url='https://earthengine-highvolume.googleapis.com')
except Exception:
    ee.Authenticate()
    ee.Initialize(project='ee-andyarnellgee')# opt_url='https://earthengine-highvolume.googleapis.com')

### Whisp it

In [41]:
df_stats = whisp.whisp_formatted_stats_geojson_to_df(input_geojson_filepath=GEOJSON_EXAMPLE_FILEPATH,external_id_column=None)

Reading GeoJSON file from: C:\Users\Arnell\Downloads\whisp_example_demo\random_polygons_with_ids.geojson
['Area', 'European_Primary_Forest', 'GLC_FCS30D_TC_2022', 'GLC_FCS30D_crop_2022', 'IFL_2020', 'IIASA_planted_plantation', 'Cocoa_bnetd', 'Oil_palm_Descals', 'ESA_fire_before_2020', 'ESA_fire_2001', 'ESA_fire_2002', 'ESA_fire_2003', 'ESA_fire_2004', 'ESA_fire_2005', 'ESA_fire_2006', 'ESA_fire_2007', 'ESA_fire_2008', 'ESA_fire_2009', 'ESA_fire_2010', 'ESA_fire_2011', 'ESA_fire_2012', 'ESA_fire_2013', 'ESA_fire_2014', 'ESA_fire_2015', 'ESA_fire_2016', 'ESA_fire_2017', 'ESA_fire_2018', 'ESA_fire_2019', 'ESA_fire_2020', 'ESA_TC_2020', 'ESRI_2023_TC', 'ESRI_2023_crop', 'Cocoa_ETH', 'Cocoa_2023_FDaP', 'Cocoa_FDaP', 'Forest_FDaP', 'Oil_palm_2023_FDaP', 'Oil_palm_FDaP', 'Rubber_2023_FDaP', 'Rubber_FDaP', 'GFT_naturally_regenerating', 'GFT_planted_plantation', 'GFT_primary', 'GFC_TC_2020', 'GFC_loss_after_2020', 'GFC_loss_before_2020', 'GFC_loss_year_2001', 'GFC_loss_year_2002', 'GFC_loss_yea

### Display results

In [43]:
df_stats

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,TMF_regrowth_2023,ESRI_2023_TC,GLC_FCS30D_TC_2022,Oil_palm_2023_FDaP,Rubber_2023_FDaP,Cocoa_2023_FDaP,ESRI_2023_crop,GLC_FCS30D_crop_2022,GFW_logging,geo
0,1,,1.939,Polygon,GHA,GH,Ashanti Region,-1.611942,6.15954,ha,...,0.803,1.939,1.939,1.834,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.61283..."
1,2,,4.152,Polygon,GHA,GH,Ashanti Region,-1.644732,6.104735,ha,...,0.017,4.152,4.063,0.042,0.0,0.003,0.0,0.089,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.64615..."
2,3,,16.6,Polygon,GHA,GH,Western Region,-2.157144,5.981149,ha,...,0.0,16.6,16.511,0.661,0.0,0.0,0.0,0.089,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.15951..."
3,4,,31.212999,Polygon,IDN,ID,South Sumatra,103.956096,-3.054668,ha,...,0.0,6.332,27.767,26.664,2.145,0.0,24.882,3.356,0.0,"{'type': 'Polygon', 'coordinates': [[[103.9514..."
4,5,,1.964,Polygon,IDN,ID,South Sumatra,103.970371,-3.068831,ha,...,0.316,1.934,0.686,0.0,1.626,0.0,0.0,1.278,0.0,"{'type': 'Polygon', 'coordinates': [[[103.9694..."
5,6,,12.725,Polygon,IDN,ID,South Sumatra,103.975182,-3.082922,ha,...,1.431,12.725,12.152,0.204,0.05,0.0,0.0,0.573,0.0,"{'type': 'Polygon', 'coordinates': [[[103.9731..."
6,7,,20.882,Polygon,IDN,ID,South Sumatra,103.977512,-3.083808,ha,...,4.897,20.882,20.120001,0.118,0.238,0.0,0.0,0.762,0.0,"{'type': 'Polygon', 'coordinates': [[[103.9749..."
7,8,,8.279,Polygon,CIV,CI,Lagunes,-4.101646,5.711935,ha,...,3.282,6.147,8.279,0.089,1.175,0.072,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-4.10288..."
8,9,,1.981,Polygon,CIV,CI,Lagunes,-4.086848,5.673811,ha,...,0.715,1.981,1.981,0.11,0.032,0.276,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-4.08767..."
9,10,,3.797,Polygon,CIV,CI,District Autonome D'Abidjan,-4.119589,5.572136,ha,...,2.857,3.797,3.717,0.68,3.134,0.0,0.0,0.08,0.0,"{'type': 'Polygon', 'coordinates': [[[-4.12062..."


### Export table to CSV

In [44]:
df_stats.to_csv(folder_path+"/"+"whisp_output_regular.csv",index=False)