### Whisp a feature collection

Setup
- Use a [virtual environment](https://docs.python.org/3/tutorial/venv.html) to avoid altering your python environment 

Usage:
- Use this notebook with smaller datasets (e.g., up to 10,000 features). 
- For larger datasets consider the 'whisp_geojson_to_drive.ipynb' notebook, which is more suited to heavy processing
- Please report issues with this notebook [here](https://github.com/forestdatapartnership/whisp/issues)

In [1]:
# Earth Engine and Common Libraries|
import ee
from pathlib import Path

# Authenticate and initialize Earth Engine. 
try:
    ee.Initialize()  # Try to use existing credentials first
except Exception:
    ee.Authenticate() # Authenticate may open a browser window
    ee.Initialize()

# NB if not working add your cloud project: ee.Initialize(project="your_gee_cloud_project_name")

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_7TDKVSyKvBdmMqW?ref=4i2o6


In [2]:
# Install openforis-whisp (uncomment line if not already installed)
# !pip install --pre openforis-whisp

# NB for editable mode install via your terminal with: pip install -e .[dev]

In [3]:
import openforis_whisp as whisp

Get a feature collection

In [4]:
GEOJSON_EXAMPLE_FILEPATH = whisp.get_example_data_path("geojson_example.geojson")

Whisp it

Choose countries to process (currently three countries: 'co', 'ci', 'br')


In [5]:
iso2_codes_list = ['co', 'ci', 'br']  # Example ISO2 codes for including country specific data

Choose if want to include additional custom layers

In [12]:
USE_CUSTOM_BANDS = True # set to True if want to add extra ee data to whisp

In [13]:
# =============================================================================
# CUSTOM BANDS SETUP (OPTIONAL)
# Set USE_CUSTOM_BANDS = True and modify the examples below
# =============================================================================

if USE_CUSTOM_BANDS:
    
    # Step 1: Create your Earth Engine images in a dictionary. NB binary images only i.e., values of 0 or 1
    custom_images = {
        'example_treecover': ee.Image(1),  # Example: uniform coverage
        'nBR_example_commodity': ee.Image.random(seed=1).gte(.5).reproject(crs='EPSG:4326', scale=10)  # Example: random pixel coverage
        # add more images as needed
    }
    
    # Step 2: Define band metadata for each image (NB keys must match above)
    # Each band name serves as the key, with configuration parameters as values
    custom_bands_info = {
        'example_treecover': {           
            'ISO2_code': "",          # Country code based on ISO2 (empty = applies to all countries)
            'theme': 'treecover',     # Risk theme: 'treecover', 'commodities', 'disturbance_before', 'disturbance_after'
            'theme_timber': "",       # Timber-specific theme (if applicable): 'primary', 'naturally_reg_2020', 'planted_plantation_2020','treecover_after_2020', 'agri_after_2020','logging_concession'  
            'use_for_risk': 1,        # Include in main risk calculations (1=yes, 0=no)
            'use_for_risk_timber': 0  # Include in timber risk calculation (1=yes, 0=no)

        },
        'nBR_example_commodity': {       
            'ISO2_code': "BR",           
            'theme': 'commodities',      
            'theme_timber': "",          
            'use_for_risk': 1,           
            'use_for_risk_timber': 0     
        }
        # add more band metadata as needed
    }
    
     # Step 3: Rename and combine images
    band_names = list(custom_bands_info.keys())
    
    # Start with first image
    custom_ee_image = custom_images[band_names[0]].rename(band_names[0])
    
    # Add remaining images if any
    for name in band_names[1:]:
        next_image = custom_images[name].rename(name)
        custom_ee_image = custom_ee_image.addBands(next_image)
    
    # Convert to area values
    custom_ee_image = custom_ee_image.multiply(ee.Image.pixelArea())
    custom_bands = band_names

In [16]:
# Create Whisp image with custom bands if enabled
whisp_image = whisp.combine_datasets(national_codes=iso2_codes_list)

# add extra bands if enabled
if USE_CUSTOM_BANDS and custom_ee_image is not None:
    whisp_image = whisp_image.addBands(custom_ee_image)
    print(f'Added custom bands {custom_ee_image.bandNames().getInfo()} to Whisp image')

# print(whisp_image.bandNames().getInfo())

Whisp multiband image compiled
Added custom bands ['example_treecover', 'nBR_example_commodity'] to Whisp image
Added custom bands ['example_treecover', 'nBR_example_commodity'] to Whisp image


In [None]:
df_formatted_stats = whisp.whisp_formatted_stats_geojson_to_df(
    input_geojson_filepath=GEOJSON_EXAMPLE_FILEPATH,
    external_id_column="user_id", # optional -  specify which input column/property to map to the external ID.
    national_codes=iso2_codes_list,  # optional - By default national datasets are not included unless specified here.
    # unit_type='percent', # optional - to change unit type. Default is 'ha'. 
    whisp_image=whisp_image,
    custom_bands=custom_bands if USE_CUSTOM_BANDS else None  # include custom bands in formatted output 
) 

Using provided whisp_image
Processing feature collection
Using cached schema for national_codes: ['co', 'ci', 'br']
[logger.py | info() | l.23] INFO: Found 2 extra columns: ['example_treecover', 'nBR_example_commodity']
[logger.py | info() | l.23] INFO: All expected schema columns found in DataFrame.
[logger.py | info() | l.23] INFO: No extra columns found in DataFrame.
[logger.py | info() | l.23] INFO: custom_bands=None: Excluding all custom bands (strict mode)


In [17]:
df_formatted_stats

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,nBR_MapBiomas_col9_palmoil_2020,nBR_MapBiomas_col9_pc_2020,nBR_INPE_TCamz_cer_annual_2020,nBR_MapBiomas_col9_soy_2020,nBR_MapBiomas_col9_annual_crops_2020,nBR_INPE_TCamz_pasture_2020,nBR_INPE_TCcer_pasture_2020,nBR_MapBiomas_col9_pasture_2020,nCI_Cocoa_bnetd,geo
0,1,1,5778.994141,Polygon,CAF,CF,Mambéré-Kadéï,16.27488,4.083041,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[16.22468..."
1,2,2,9043.37207,Polygon,GAB,GA,Ngounié,10.89905,-1.024023,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[10.86284..."
2,3,3,1432.906006,Polygon,BEN,BJ,Zou,2.181025,7.037783,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[2.148833..."
3,4,4,196.804993,Polygon,BRA,BR,Mato Grosso,-54.38651,-11.910565,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-54.3934..."
4,5,5,250.063995,Polygon,BRA,BR,Mato Grosso,-54.706451,-11.992036,ha,...,0.0,0.0,247.358002,242.339005,4.171,0.0,0.0,3.297,0.0,"{'type': 'Polygon', 'coordinates': [[[-54.7171..."
5,6,6,1.939,MultiPolygon,GHA,GH,Ashanti Region,-1.611942,6.15954,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.61283..."
6,7,7,4.152,MultiPolygon,GHA,GH,Ashanti Region,-1.644732,6.104735,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-1.64615..."
7,8,8,16.6,MultiPolygon,GHA,GH,Western Region,-2.157144,5.981149,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[-2.15951..."
8,9,9,31.212999,MultiPolygon,IDN,ID,South Sumatra,103.956096,-3.054668,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[103.9514..."
9,10,10,1.964,MultiPolygon,IDN,ID,South Sumatra,103.970371,-3.068831,ha,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"{'type': 'Polygon', 'coordinates': [[[103.9694..."


In [None]:
# Define the output folder (if running in Sepal change path to preferred folder) 
# e.g. out_directory = Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table.csv'

# Save the CSV file
df_formatted_stats.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table with risk columns saved to: {csv_output_file}")

Calculate risk category

In [None]:
# add risk columns to end of dataframe
df_w_risk = whisp.whisp_risk(df=df_formatted_stats,
                             national_codes=iso2_codes_list,# optional - By default national datasets are not included. This should align with the national_codes used above.
                             custom_bands_info=custom_bands_info
            )

Display table with risk columns

In [None]:
df_w_risk

Export table to CSV

In [None]:
# Define the output folder 
# e.g. in running in Sepal this might be: Path.home() / 'module_results/whisp/'
out_directory = Path.home() / 'downloads'

# Define the output file path for CSV
csv_output_file = out_directory / 'whisp_output_table_w_risk.csv'

# Save the CSV file
df_w_risk.to_csv(path_or_buf=csv_output_file, index=False)
print(f"Table with risk columns saved to: {csv_output_file}")

Export to GeoJSON (optional)

In [None]:
# Define the output file path for GeoJSON
geojson_output_file = out_directory / 'whisp_output_geo_w_risk.geojson'

# Save the GeoJSON file
whisp.convert_df_to_geojson(df_w_risk, geojson_output_file)  # builds a geojson file containing Whisp columns. Uses the geometry column "geo" to create the spatial features.
print(f"GeoJSON file saved to: {geojson_output_file}")