### Whisp a geojson

Setup
- Use a [virtual environment](https://docs.python.org/3/tutorial/venv.html) to avoid altering your python environment 

Usage:
- Use this notebook with smaller datasets (e.g., up to 10,000 features). 
- For larger datasets consider the 'whisp_geojson_to_drive.ipynb' notebook, which is more suited to heavy processing
- Please report issues with this notebook [here](https://github.com/forestdatapartnership/whisp/issues)

In [1]:
# Earth Engine and Common Libraries|
import ee
from pathlib import Path

# Authenticate and initialize Earth Engine. 
try:
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')  # Try to use existing credentials first
except Exception:
    ee.Authenticate() # Authenticate may open a browser window
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

# NB if not working add your cloud project: ee.Initialize(project="your_gee_cloud_project_name")

In [2]:
# Install openforis-whisp (uncomment line if not already installed)
# !pip install --pre openforis-whisp

# NB for editable mode install via your terminal with: pip install -e .[dev]

In [3]:
import openforis_whisp as whisp

Get a geojson

In [4]:
GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")

Prepare inputs

In [5]:
# Choose if want to include additional custom layers
USE_CUSTOM_BANDS = False # set to True if want to add extra ee data to whisp

In [6]:
# =============================================================================
# CUSTOM BANDS SETUP (OPTIONAL) - runs only if USE_CUSTOM_BANDS = True above
# =============================================================================
if USE_CUSTOM_BANDS:

    # Step 1: Define custom Earth Engine images (binary values 0 or 1)
    custom_images = {
        'example_treecover': ee.Image(1),  # ee.Image("UMD/hansen/global_forest_change_2024_v1_12").select("treecover2000").gt(10).selfMask()
        'nXX_example_commodity': ee.Image.random(seed=1).gte(.5).reproject(crs='EPSG:4326', scale=10) # ee.ImageCollection("projects/forestdatapartnership/assets/cocoa/model_2025a").filter(ee.Filter.date('2020-01-01', '2021-01-01')).mosaic().gt(.8).selfMask()
        # add more images as needed (prefix 'nXX_' = iso2 code for national dataset)
    }

    # Step 2: Define metadata for each custom band (keys must match above)
    # Themes: 'treecover', 'commodities', 'disturbance_before', 'disturbance_after'
    # Timber themes: 'primary', 'naturally_reg_2020', 'planted_plantation_2020', etc.
    custom_bands_info = {
        'example_treecover': {
            'ISO2_code': "",          # Country code (empty = all countries)
            'theme': 'treecover',     # Risk theme
            'theme_timber': "",       # Timber theme (if applicable)
            'use_for_risk': 1,        # Include in risk calculations (1=yes, 0=no)
            'use_for_risk_timber': 0  # Include in timber risk (1=yes, 0=no)
        },
        'nXX_example_commodity': {
            'ISO2_code': "XX", 
            'theme': 'commodities', 
            'theme_timber': "",
            'use_for_risk': 1, 
            'use_for_risk_timber': 0
        }
        # add more band metadata as needed
    }

    # Step 3: Combine custom bands and extract names
    custom_ee_image = whisp.combine_custom_bands(custom_images, custom_bands_info)

    custom_bands = list(custom_bands_info.keys())


In [7]:
# Choose additional national datasets to include (currently three countries: 'co', 'ci', 'br').
base_iso2_codes = ['co', 'ci', 'br']

# automatically add any custom ISO2 codes from custom_bands_info if USE_CUSTOM_BANDS is True
iso2_codes_list = base_iso2_codes.copy()
if USE_CUSTOM_BANDS:
    iso2_codes_list += [code.lower() for code in {v.get('ISO2_code') for v in custom_bands_info.values()} if code and code.lower() not in iso2_codes_list]

In [8]:
# Create final Whisp image
whisp_image = whisp.combine_datasets(national_codes=iso2_codes_list)
standard_bands = len(whisp_image.bandNames().getInfo())

if USE_CUSTOM_BANDS and 'custom_ee_image' in locals():
    whisp_image = whisp_image.addBands(custom_ee_image)
    print(f"Final image has {standard_bands + len(custom_bands)} bands ({standard_bands} + {len(custom_bands)} custom)")
else:
    print(f"Final image has {standard_bands} bands")

Whisp multiband image compiled
Final image has 198 bands


Run Whisp 

In [9]:
ee.Reset()
# ee.Initialize()
ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com")

In [None]:
# Reload modules to pick up latest changes
import importlib
importlib.reload(__import__('openforis_whisp.advanced_stats', fromlist=['']))
import openforis_whisp
importlib.reload(openforis_whisp)

<module 'openforis_whisp' from 'C:\\Users\\Arnell\\Documents\\GitHub\\whisp\\src\\openforis_whisp\\__init__.py'>

In [18]:
df_stats = whisp.whisp_formatted_stats_geojson_to_df_fast(
    input_geojson_filepath=GEOJSON_EXAMPLE_FILEPATH,
    external_id_column="user_id", # optional -  specify which input column/property to map to the external ID.
    national_codes=iso2_codes_list,  # optional - By default national datasets are not included unless specified here.
    unit_type='percent', # optional - to change unit type. Default is 'ha'. 
    whisp_image=whisp_image, # optional - defaults to standard whisp image if not provided
    custom_bands=custom_bands if USE_CUSTOM_BANDS else None,  # include custom bands in formatted output 
    mode = "concurrent",
) 

INFO: Mode explicitly set to: concurrent
INFO: Loading GeoJSON: ..\tests\fixtures\geojson_example.geojson
INFO: Loaded 50 features
INFO: Processing 50 features in 5 batches


2025-11-04 14:18:29,708 - INFO - Created 10 records
2025-11-04 14:18:29,762 - INFO - Created 10 records
2025-11-04 14:18:29,762 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpxfb6j33q.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpx6z7323z.geojson


2025-11-04 14:18:29,775 - INFO - Created 10 records
2025-11-04 14:18:29,812 - INFO - Created 10 records


Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmpbdsncqfy.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmppo2jc3lk.geojson
Reading GeoJSON file from: C:\Users\Arnell\AppData\Local\Temp\tmp3gtdxg6l.geojson
INFO: Progress: 2/5 (40% complete)
INFO: Progress: 3/5 (60% complete)
INFO: Progress: 4/5 (80% complete)
INFO: Progress: 5/5 (100% complete)
INFO: Processing complete: 5/5 batches
INFO: Processed 50 features successfully
Creating schema for national_codes: ['co', 'ci', 'br']
[logger.py | info() | l.23] INFO: All expected schema columns found in DataFrame.
[logger.py | info() | l.23] INFO: No extra columns found in DataFrame.


  combined = pd.concat(results, ignore_index=True)


INFO: Concurrent processing + formatting + validation complete


Display results

In [None]:
df_stats


Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,nBR_MapBiomas_col9_palmoil_2020,nBR_MapBiomas_col9_pc_2020,nBR_INPE_TCamz_cer_annual_2020,nBR_MapBiomas_col9_soy_2020,nBR_MapBiomas_col9_annual_crops_2020,nBR_INPE_TCamz_pasture_2020,nBR_INPE_TCcer_pasture_2020,nBR_MapBiomas_col9_pasture_2020,nCI_Cocoa_bnetd,geo
0,1,1,1409.708984,Polygon,CAF,CF,Equateur,16.334652,3.158787,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[16.31529..."
1,2,2,0.203,Polygon,KEN,KE,Kiambu,36.871113,-0.99516,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[36.87089..."
2,3,3,1.129,Polygon,BRA,BR,Minas Gerais,-47.007553,-20.478246,percent,...,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-47.0081..."
3,4,4,0.284,Polygon,KEN,KE,Kiambu,36.862896,-0.991854,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[36.86240..."
4,5,5,2.465,Polygon,CIV,CI,Montagnes,-7.879019,6.457085,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,81.329002,"{'type': 'Polygon', 'coordinates': [[[-7.88004..."
5,6,6,0.493,Polygon,CIV,CI,Gôh-Djiboua,-5.585572,5.598818,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-5.58617..."
6,7,7,1.624,Polygon,CIV,CI,Sassandra-Marahoué,-6.756305,7.36691,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-6.75684..."
7,8,8,1.442,Polygon,GHA,GH,Western,-2.289581,5.901817,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.29035..."
8,9,9,0.588,Polygon,GHA,GH,Western,-2.953024,6.506243,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.95349..."
9,10,10,9.119,Polygon,PER,PE,Loreto,-76.096543,-6.035658,percent,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-76.0984..."


In [None]:
# Define the output folder (if running in Sepal change path to preferred folder) 
# e.g. out_directory = Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table.csv'

# Save the CSV file
df_stats.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table saved to: {csv_output_file}")

Table saved to: C:\Users\Arnell\downloads\whisp_output_table.csv


Calculate risk category

In [None]:
# adds risk columns to end of dataframe
df_w_risk = whisp.whisp_risk(
    df=df_stats,
    national_codes=iso2_codes_list,
    custom_bands_info=custom_bands_info if USE_CUSTOM_BANDS else None  # Add: missing custom bands
)

Using unit type: percent
Including additional national data for: ['co', 'ci', 'br']


Display table with risk columns

In [None]:
df_w_risk

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,Ind_05_primary_2020,Ind_06_nat_reg_forest_2020,Ind_07_planted_plantations_2020,Ind_08_planted_plantations_after_2020,Ind_09_treecover_after_2020,Ind_10_agri_after_2020,Ind_11_logging_concession_before_2020,risk_pcrop,risk_acrop,risk_timber
0,1,1,1409.708984,Polygon,CAF,CF,Equateur,16.334652,3.158787,percent,...,yes,yes,no,no,yes,no,yes,more_info_needed,more_info_needed,low
1,2,2,0.203,Polygon,KEN,KE,Kiambu,36.871113,-0.99516,percent,...,no,yes,no,no,yes,no,no,more_info_needed,more_info_needed,low
2,3,3,1.129,Polygon,BRA,BR,Minas Gerais,-47.007553,-20.478246,percent,...,no,yes,no,no,no,yes,no,low,low,low
3,4,4,0.284,Polygon,KEN,KE,Kiambu,36.862896,-0.991854,percent,...,no,yes,no,no,yes,no,no,low,more_info_needed,low
4,5,5,2.465,Polygon,CIV,CI,Montagnes,-7.879019,6.457085,percent,...,no,yes,no,no,yes,yes,no,low,low,low
5,6,6,0.493,Polygon,CIV,CI,Gôh-Djiboua,-5.585572,5.598818,percent,...,no,yes,no,no,yes,yes,no,high,high,high
6,7,7,1.624,Polygon,CIV,CI,Sassandra-Marahoué,-6.756305,7.36691,percent,...,no,yes,no,no,yes,yes,no,more_info_needed,more_info_needed,high
7,8,8,1.442,Polygon,GHA,GH,Western,-2.289581,5.901817,percent,...,no,yes,no,no,yes,yes,no,low,low,low
8,9,9,0.588,Polygon,GHA,GH,Western,-2.953024,6.506243,percent,...,no,yes,no,no,yes,yes,no,low,low,low
9,10,10,9.119,Polygon,PER,PE,Loreto,-76.096543,-6.035658,percent,...,yes,yes,no,no,yes,no,no,high,high,low


Export table to CSV

In [None]:
# Define the output folder 
# e.g. in running in Sepal this might be: Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table_w_risk.csv'

# Save the CSV file
df_w_risk.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table with risk columns saved to: {csv_output_file}")

Table with risk columns saved to: C:\Users\Arnell\downloads\whisp_output_table_w_risk.csv


Export to GeoJSON (optional)

In [None]:
# Define the output file path for GeoJSON
geojson_output_file = out_directory / 'whisp_output_geo_w_risk.geojson'

# Save the GeoJSON file
whisp.convert_df_to_geojson(df_w_risk, geojson_output_file)  # builds a geojson file containing Whisp columns. Uses the geometry column "geo" to create the spatial features.
print(f"GeoJSON file saved to: {geojson_output_file}")

GeoJSON saved to C:\Users\Arnell\downloads\whisp_output_geo_w_risk.geojson
GeoJSON file saved to: C:\Users\Arnell\downloads\whisp_output_geo_w_risk.geojson
