### Whisp a GeoJSON via Google drive
- Use this notebook with larger datasets (e.g., over 10,000 features). 
- For smaller datasets consider 'whisp_geojson_to_csv.ipynb' or 'Colab_whisp_geojson_to_csv.ipynb' notebooks, as these are simpler to run
- Note: This workflow requires retrieving output files manually from your Google Drive.
- Please report issues with this notebook [here](https://github.com/forestdatapartnership/whisp/issues)

Setup
- NB use a virtual environment to avoid altering your python environment (https://docs.python.org/3/tutorial/venv.html)

In [None]:
# Earth Engine and Common Libraries
import ee
from pathlib import Path
import pandas as pd

# Authenticate and initialize Earth Engine
try:
    ee.Initialize()  # Try to use existing credentials first
except Exception:
    ee.Authenticate()
    ee.Initialize()

In [None]:
# Install openforis-whisp (uncomment line if not already installed)
# !pip install --pre openforis-whisp

# NB for editable mode install via your terminal with: pip install -e .[dev]

In [None]:
import openforis_whisp as whisp

Get a feature collection

In [None]:
GEOJSON_EXAMPLE_FILEPATH = (
    Path.cwd().parents[0]/ "tests" / "fixtures" / "geojson_example.geojson"
)
print(GEOJSON_EXAMPLE_FILEPATH)


Prepare inputs

In [None]:
# Choose countries to process (currently three countries: 'co', 'ci', 'br')
iso2_codes_list = ['co', 'ci', 'br']  # Example ISO2 codes for including country specific data

# Choose if want to include additional custom layers
USE_CUSTOM_BANDS = False # set to True if want to add extra ee data to whisp

In [None]:
# =============================================================================
# CUSTOM BANDS SETUP (OPTIONAL) - runs only if USE_CUSTOM_BANDS = True above
# =============================================================================
if USE_CUSTOM_BANDS:
    
    # Step 1: Define custom Earth Engine images (binary values 0 or 1)
    custom_images = {
        'example_treecover': ee.Image(1),  
        'nBR_example_commodity': ee.Image.random(seed=1).gte(.5).reproject(crs='EPSG:4326', scale=10)
        # add more images as needed
    }
    
    # Step 2: Define metadata for each custom band (keys must match above)
    # Themes: 'treecover', 'commodities', 'disturbance_before', 'disturbance_after'
    # Timber themes: 'primary', 'naturally_reg_2020', 'planted_plantation_2020', etc.
    custom_bands_info = {
        'example_treecover': {
            'ISO2_code': "",          # Country code (empty = all countries)
            'theme': 'treecover',     # Risk theme
            'theme_timber': "",       # Timber theme (if applicable)
            'use_for_risk': 1,        # Include in risk calculations (1=yes, 0=no)
            'use_for_risk_timber': 0  # Include in timber risk (1=yes, 0=no)
        },
        'nBR_example_commodity': {
            'ISO2_code': "BR", 'theme': 'commodities', 'theme_timber': "", 
            'use_for_risk': 1, 'use_for_risk_timber': 0
        }
        # add more band metadata as needed
    }
    
    # Step 3: Combine custom bands and extract names
    custom_ee_image = whisp.combine_custom_bands(custom_images, custom_bands_info)
    
    custom_bands = list(custom_bands_info.keys())


In [None]:
# Create final Whisp image
whisp_image = whisp.combine_datasets(national_codes=iso2_codes_list)
if USE_CUSTOM_BANDS and 'custom_ee_image' in locals():
    whisp_image = whisp_image.addBands(custom_ee_image)

print(f"📊 Final image has {len(whisp_image.bandNames().getInfo())} bands")

In [None]:
df_formatted_stats = whisp.whisp_stats_geojson_to_drive(
    input_geojson_filepath=GEOJSON_EXAMPLE_FILEPATH,
    # external_id_column="user_id",# optional -  specify which input column/property to map to the external ID.
    national_codes=iso2_codes_list,  # optional - By default national datasets are not included unless specified here.
    # unit_type='percent', # optional - to change unit type. Default is 'ha'. 
    whisp_image=whisp_image, # optional - defaults to standard whisp image if not provided
) 

Manual step 
- download from Google Drive when finished
- place in output folder (or alternative location)



Import CSV of raw stats


In [None]:
# Define output directory
out_directory = Path.home() / 'Downloads' 

# Define output file path
stats_file_from_drive = out_directory / "whisp_output_table.csv" # edit as required

df_stats = pd.read_csv(stats_file_from_drive)

df_stats = df_stats.rename(columns={".geo": "geo"})


Display table 

In [None]:
# raw stats
df_stats

Format stats based on Whisp schema

In [None]:

df_stats = whisp.convert_iso3_to_iso2(df=df_stats, iso3_column="Country", iso2_column="ProducerCountry") # temp conversion to add iso2 column

df_formatted_stats = whisp.validate_dataframe_using_lookups_flexible(df_stats, national_codes=iso2_codes_list, custom_bands=custom_bands if USE_CUSTOM_BANDS else None)


Display table
- Note: If this doesn't look right, check previous steps (including if using the correct downloaded csv results)

In [None]:
df_formatted_stats #view output dataframe


Calculate risk category

In [None]:
# add risk columns to end of dataframe
df_w_risk = whisp.whisp_risk(df=df_formatted_stats,
                             national_codes=iso2_codes_list, # optional - By default national datasets are not included. This should align with the national_codes used above.
                             custom_bands_info=custom_bands_info if USE_CUSTOM_BANDS else None
                             )


Display table with risk columns

In [None]:
df_w_risk

Export table with risk columns to csv 

In [None]:
# Define output file path
output_risk_file = out_directory / "whisp_output_table_w_risk.csv" # edit as required

# Save statistics with added risk columns to CSV
df_w_risk.to_csv(path_or_buf=output_risk_file,index=False)

print(f"Table with risk columns saved to: {output_risk_file}")

Export to GeoJSON (optional)

In [None]:
# Define the output file path for GeoJSON
geojson_output_file = out_directory / 'whisp_output_table.geojson'

# Save the GeoJSON file
whisp.convert_df_to_geojson(df_w_risk, geojson_output_file)  # builds a geojson file containing Whisp columns. Uses the geometry column "geo" to create the spatial features.
print(f"GeoJSON file saved to: {geojson_output_file}")