### Whisp a geojson

Setup
- Use a [virtual environment](https://docs.python.org/3/tutorial/venv.html) to avoid altering your python environment 

Usage:
- Use this notebook with smaller datasets (e.g., up to 10,000 features). 
- For larger datasets consider the 'whisp_geojson_to_drive.ipynb' notebook, which is more suited to heavy processing
- Please report issues with this notebook [here](https://github.com/forestdatapartnership/whisp/issues)

In [92]:
# Earth Engine and Common Libraries|
import ee
from pathlib import Path

# Authenticate and initialize Earth Engine. 
try:
    ee.Initialize()  # Try to use existing credentials first
except Exception:
    ee.Authenticate() # Authenticate may open a browser window
    ee.Initialize()

# NB if not working add your cloud project: ee.Initialize(project="your_gee_cloud_project_name")

In [93]:
# Install openforis-whisp (uncomment line if not already installed)
# !pip install --pre openforis-whisp

# NB for editable mode install via your terminal with: pip install -e .[dev]

In [94]:
import openforis_whisp as whisp

Get a geojson

In [None]:
# GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")

In [96]:
folder_path = r"C:\Users\Arnell\Downloads\a_processing_tests"

In [None]:
folder_path = r"C:\Users\Arnell\Downloads\a_processing_tests"

geom = (ee.FeatureCollection("projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1")
    .filter(ee.Filter.eq('gaul0_name', 'Brazil')).geometry()
)

geojson = whisp.generate_test_polygons(
    bounds = geom,
    num_polygons=100,
    min_area_ha=10,
    max_area_ha=50,
    min_number_vert=50,
    max_number_vert=100
    )



[utils.py | generate_test_polygons() | l.378] INFO: Extracting bounds from Earth Engine Geometry...
[utils.py | generate_test_polygons() | l.391] INFO: Bounds: [-73.98, -33.75, -28.85, 5.27]
[utils.py | generate_test_polygons() | l.419] INFO: Generating 100 test polygons with 50-100 vertices...
[utils.py | generate_test_polygons() | l.467] INFO: Generated 100 polygons!
[utils.py | generate_test_polygons() | l.473] INFO: Vertex count - Requested: 50-99, Actual: 50-99
[utils.py | generate_test_polygons() | l.481] INFO: Area (ha) - Requested: 10.6-50.0, Actual: 10.5-53.9


In [104]:
import json
with open (GEOJSON_EXAMPLE_FILEPATH, "w") as f:
    json.dump(geojson,f)



In [105]:
import json
import ee

def convert_geojson_to_ee_quick(geojson_filepath):
    with open(geojson_filepath, "r") as f:
        geojson_data = json.load(f)
    return ee.FeatureCollection(geojson_data)

In [106]:
fc = convert_geojson_to_ee_quick(GEOJSON_EXAMPLE_FILEPATH)
print(str(fc.size().getInfo()))


100


In [107]:
fc = whisp.convert_geojson_to_ee(GEOJSON_EXAMPLE_FILEPATH)
# print (str(fc.size().getInfo()))
# fc = fc.map(
#     lambda feature: feature.set('Area_Geometry', feature.geometry().area().divide(10000))
#              )
# print (str(fc.first().get("Area_Geometry").getInfo()))
# print(str(fc.size().getInfo()))


Reading GeoJSON file from: c:\Users\Arnell\Documents\GitHub\whisp\tests\fixtures\geojson_example.geojson


In [None]:
# --- Helper functions for extracting properties from FeatureCollection ---
import pandas as pd
from openforis_whisp.parameters.config_runtime import admin_1_column, iso3_country_column, iso2_country_column, geometry_type_column, centroid_x_coord_column, centroid_y_coord_column

# Function to join admin codes using lookup_dict (from admin_code_linking)
def join_admin_codes(df, lookup_dict, id_col):
    lookup_df = pd.DataFrame.from_dict(lookup_dict, orient='index')
    lookup_df.index.name = 'gaul1_code'
    lookup_df = lookup_df.reset_index()
    merged_df = df.merge(lookup_df, left_on=id_col, right_on='gaul1_code', how='left')
    merged_df = merged_df.rename(columns={
        'gaul1_name': admin_1_column,
        'iso3_code': iso3_country_column,
        'iso2_code': iso2_country_column
    })
    merged_df = merged_df.drop(columns=['gaul1_code','gaul0_name'])
    return merged_df

# Function to extract centroid, geometry type, and coordinates from an ee.Feature
# (Assumes you have a FeatureCollection 'fc')
def extract_centroid_and_geomtype(fc):
    def add_centroid_and_geomtype(feature):
        centroid = feature.geometry().centroid(1)
        coords = centroid.coordinates()
        return feature.set({
            centroid_x_coord_column: coords.get(0),
            centroid_y_coord_column: coords.get(1),
            geometry_type_column: feature.geometry().type()
        })
    return fc.map(add_centroid_and_geomtype)

# Example usage:
fc_with_centroids = extract_centroid_and_geomtype(fc)
df = whisp.convert_ee_to_df(fc_with_centroids, remove_geom=True)


# Now join admin codes using lookup_dict and the centroid-based admin code column
from openforis_whisp.parameters.lookup_gaul1_admin import lookup_dict
result_df = join_admin_codes(df, lookup_dict, id_col='first')  # 'first' is the admin code column from reduceRegions

result_df.head()

In [109]:
import geopandas as gpd

# Load your GeoJSON file
gdf = gpd.read_file(GEOJSON_EXAMPLE_FILEPATH)

# Calculate centroids (returns Point geometry)
gdf['centroid'] = gdf.geometry.centroid

# Extract centroid coordinates
gdf['centroid_x'] = gdf['centroid'].x
gdf['centroid_y'] = gdf['centroid'].y

# Get geometry type
gdf['geometry_type'] = gdf.geometry.geom_type

# Display results
print(gdf[['centroid_x', 'centroid_y', 'geometry_type']])

    centroid_x  centroid_y geometry_type
0   -44.999039  -28.908566       Polygon
1   -72.694297  -11.508242       Polygon
2   -40.567391  -16.634939       Polygon
3   -66.825503  -11.746275       Polygon
4   -29.217520    3.110134       Polygon
..         ...         ...           ...
95  -49.370589   -3.886532       Polygon
96  -28.930885  -14.047388       Polygon
97  -56.530265  -11.939831       Polygon
98  -53.609456  -26.461115       Polygon
99  -39.309528  -32.420601       Polygon

[100 rows x 3 columns]



  gdf['centroid'] = gdf.geometry.centroid


In [111]:
# --- Test the helper functions with fc ---
# 1. Add centroid and geometry type properties to each feature
fc_with_centroids = extract_centroid_and_geomtype(fc)
# print(fc_with_centroids.first().get(geometry_type_column).getInfo())
# 2. Convert to DataFrame (remove_geom=True to avoid geometry column)
df_centroids = whisp.convert_ee_to_df(fc_with_centroids, remove_geom=True)
df_centroids.head()

Unnamed: 0,geo,Centroid_lat,Centroid_lon,Geometry_type,actual_area_ha,actual_vertices,internal_id,requested_area_ha,requested_vertices
0,,-28.908566,-44.999039,Polygon,28.82,84,1,30.75,84
1,,-11.508242,-72.694297,Polygon,19.74,74,2,21.71,74
2,,-16.634939,-40.567391,Polygon,28.63,66,3,28.19,66
3,,-11.746275,-66.825503,Polygon,17.98,90,4,19.03,90
4,,3.110134,-29.21752,Polygon,38.15,70,5,40.14,70


In [112]:
GEOJSON_EXAMPLE_FILEPATH = folder_path + "/random_polygons.geojson"
import json

with open(GEOJSON_EXAMPLE_FILEPATH, "w") as f:
    json.dump(geojson, f)

Prepare inputs

In [113]:
# Choose if want to include additional custom layers
USE_CUSTOM_BANDS = False # set to True if want to add extra ee data to whisp

In [114]:
# =============================================================================
# CUSTOM BANDS SETUP (OPTIONAL) - runs only if USE_CUSTOM_BANDS = True above
# =============================================================================
if USE_CUSTOM_BANDS:

    # Step 1: Define custom Earth Engine images (binary values 0 or 1)
    custom_images = {
        'example_treecover': ee.Image(1),  # ee.Image("UMD/hansen/global_forest_change_2024_v1_12").select("treecover2000").gt(10).selfMask()
        'nXX_example_commodity': ee.Image.random(seed=1).gte(.5).reproject(crs='EPSG:4326', scale=10) # ee.ImageCollection("projects/forestdatapartnership/assets/cocoa/model_2025a").filter(ee.Filter.date('2020-01-01', '2021-01-01')).mosaic().gt(.8).selfMask()
        # add more images as needed (prefix 'nXX_' = iso2 code for national dataset)
    }

    # Step 2: Define metadata for each custom band (keys must match above)
    # Themes: 'treecover', 'commodities', 'disturbance_before', 'disturbance_after'
    # Timber themes: 'primary', 'naturally_reg_2020', 'planted_plantation_2020', etc.
    custom_bands_info = {
        'example_treecover': {
            'ISO2_code': "",          # Country code (empty = all countries)
            'theme': 'treecover',     # Risk theme
            'theme_timber': "",       # Timber theme (if applicable)
            'use_for_risk': 1,        # Include in risk calculations (1=yes, 0=no)
            'use_for_risk_timber': 0  # Include in timber risk (1=yes, 0=no)
        },
        'nXX_example_commodity': {
            'ISO2_code': "XX", 
            'theme': 'commodities', 
            'theme_timber': "",
            'use_for_risk': 1, 
            'use_for_risk_timber': 0
        }
        # add more band metadata as needed
    }

    # Step 3: Combine custom bands and extract names
    custom_ee_image = whisp.combine_custom_bands(custom_images, custom_bands_info)

    custom_bands = list(custom_bands_info.keys())


In [None]:
# Choose additional national datasets to include (currently three countries: 'co', 'ci', 'br').
base_iso2_codes = ['co', 'ci', 'br']

# automatically add any custom ISO2 codes from custom_bands_info if USE_CUSTOM_BANDS is True
iso2_codes_list = base_iso2_codes.copy()
if USE_CUSTOM_BANDS:
    iso2_codes_list += [code.lower() for code in {v.get('ISO2_code') for v in custom_bands_info.values()} if code and code.lower() not in iso2_codes_list]

In [None]:
# Create final Whisp image
whisp_image = whisp.combine_datasets(national_codes=iso2_codes_list)
standard_bands = len(whisp_image.bandNames().getInfo())

if USE_CUSTOM_BANDS and 'custom_ee_image' in locals():
    whisp_image = whisp_image.addBands(custom_ee_image)
    print(f"Final image has {standard_bands + len(custom_bands)} bands ({standard_bands} + {len(custom_bands)} custom)")
else:
    print(f"Final image has {standard_bands} bands")

Whisp multiband image compiled
Final image has 196 bands
Final image has 196 bands


Run Whisp 

In [None]:
df_stats = whisp.whisp_formatted_stats_geojson_to_df(
    input_geojson_filepath=GEOJSON_EXAMPLE_FILEPATH,
    # external_id_column="user_id", # optional -  specify which input column/property to map to the external ID.
    national_codes=iso2_codes_list,  # optional - By default national datasets are not included unless specified here.
    # unit_type='percent', # optional - to change unit type. Default is 'ha'. 
    whisp_image=whisp_image, # optional - defaults to standard whisp image if not provided
    custom_bands=custom_bands if USE_CUSTOM_BANDS else None  # include custom bands in formatted output 
) 

Using provided whisp_image
Processing feature collection




Using cached schema for national_codes: ['co', 'ci', 'br']
[logger.py | info() | l.23] INFO: No extra columns found in DataFrame.
[logger.py | info() | l.23] INFO: No extra columns found in DataFrame.


Display results

In [None]:
df_stats

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,nBR_MapBiomas_col9_palmoil_2020,nBR_MapBiomas_col9_pc_2020,nBR_INPE_TCamz_cer_annual_2020,nBR_MapBiomas_col9_soy_2020,nBR_MapBiomas_col9_annual_crops_2020,nBR_INPE_TCamz_pasture_2020,nBR_INPE_TCcer_pasture_2020,nBR_MapBiomas_col9_pasture_2020,nCI_Cocoa_bnetd,geo
0,1,,21.667,Polygon,Unknown,not found,Unknown,-33.442508,-18.9276,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-33.4452..."
1,2,,45.429001,Polygon,ARG,AR,Corrientes,-56.47269,-27.884815,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-56.4765..."
2,3,,20.497,Polygon,BOL,BO,Pando,-66.204764,-10.496049,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-66.2072..."
3,4,,18.417,Polygon,BOL,BO,Tarija,-62.916901,-21.575376,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-62.9195..."
4,5,,24.228001,Polygon,BRA,BR,Amazonas,-72.792713,-7.244851,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-72.7951..."
5,6,,25.207001,Polygon,Unknown,not found,Unknown,-32.509406,-3.63363,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-32.5121..."
6,7,,33.207001,Polygon,ARG,AR,Salta,-66.183657,-23.791058,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-66.1870..."
7,8,,18.038,Polygon,BRA,BR,Mato Grosso,-55.376471,-9.986495,ha,...,0.0,0.0,0.0,0.0,0.0,12.253,0.0,13.238,0.0,"{'type': 'Polygon', 'coordinates': [[[-55.3785..."
8,9,,44.178001,Polygon,BRA,BR,Mato Grosso,-55.971113,-14.62594,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,3.267,7.323,0.0,"{'type': 'Polygon', 'coordinates': [[[-55.9745..."
9,10,,28.712,Polygon,Unknown,not found,Unknown,-42.767375,1.822941,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-42.7702..."


In [None]:
# Define the output folder (if running in Sepal change path to preferred folder) 
# e.g. out_directory = Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table.csv'

# Save the CSV file
df_stats.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table with risk columns saved to: {csv_output_file}")

Calculate risk category

In [None]:
# adds risk columns to end of dataframe
df_w_risk = whisp.whisp_risk(
    df=df_stats,
    national_codes=iso2_codes_list,
    custom_bands_info=custom_bands_info if USE_CUSTOM_BANDS else None  # Add: missing custom bands
)

Display table with risk columns

In [None]:
df_w_risk

Export table to CSV

In [None]:
# Define the output folder 
# e.g. in running in Sepal this might be: Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table_w_risk.csv'

# Save the CSV file
df_w_risk.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table with risk columns saved to: {csv_output_file}")

Export to GeoJSON (optional)

In [None]:
# Define the output file path for GeoJSON
geojson_output_file = out_directory / 'whisp_output_geo_w_risk.geojson'

# Save the GeoJSON file
whisp.convert_df_to_geojson(df_w_risk, geojson_output_file)  # builds a geojson file containing Whisp columns. Uses the geometry column "geo" to create the spatial features.
print(f"GeoJSON file saved to: {geojson_output_file}")