In [1]:
# Imports and Setup
import os
import json
import sys
import ast
import openforis_whisp as whisp
import pandas as pd
import numpy as np

print("Libraries imported successfully")


Error in default EE initialization: Please authorize access to your Earth Engine account by running

earthengine authenticate

in your command line, or ee.Authenticate() in Python, and then retry.
Libraries imported successfully


top-level pandera module will be **removed in a future version of pandera**.
If you're using pandera to validate pandas objects, we highly recommend updating
your import:

```
# old import
import pandera as pa

# new import
import pandera.pandas as pa
```

If you're using pandera to validate objects from other compatible libraries
like pyspark or polars, see the supported libraries section of the documentation
for more information on how to import pandera:

https://pandera.readthedocs.io/en/stable/supported_libraries.html


```
```



In [None]:
CREDENTIAL_PATH = os.path.join(os.path.abspath(os.getcwd()), "../../credentials.json")
print(f"Using credential path: {CREDENTIAL_PATH}")

whisp.initialize_ee(CREDENTIAL_PATH)
print("WHISP initialized successfully")


Using credential path: d:\work\fao\src\whisp-app\src\python\../../credentials.json
d:\work\fao\src\whisp-app\src\python\../../credentials.json
EE initialized with credentials from: d:\work\fao\src\whisp-app\src\python\../../credentials.json
WHISP initialized successfully


In [None]:
class AnalysisOptions:
    def __init__(self, d: dict | None):
        d = d or {}
        self.external_id_column = d.get('externalIdColumn')
        self.unit_type = d.get('unitType')
        nc = d.get('nationalCodes')
        self.national_codes = [str(c).lower() for c in nc] if isinstance(nc, list) and nc else None


print("AnalysisOptions class defined")


AnalysisOptions class defined


In [None]:
file_name = "f6549633-ee9c-4700-abb7-b4f6f41e6302.json"
file_path=os.path.join(os.path.abspath(os.getcwd()), "../../temp/", file_name)

# Set legacy mode if needed
legacy_mode = False  # Set to True if you want legacy output format

print(f"File path: {file_path}")
print(f"Legacy mode: {legacy_mode}")


File path: d:\work\fao\src\whisp-app\src\python\../../temp/f6549633-ee9c-4700-abb7-b4f6f41e6302.json
Legacy mode: False


In [None]:
# Load and parse analysis options from file
opts = AnalysisOptions(None)

try:
    with open(file_path, 'r') as f:
        payload = json.load(f)
        opts = AnalysisOptions(payload.get('analysis_options') if isinstance(payload, dict) else None)
    print("Analysis options loaded from file")
except Exception as e:
    print(f"Could not load analysis options from file: {e}")
    print("Using default options")

print(f"External ID column: {opts.external_id_column}")
print(f"Unit type: {opts.unit_type}")
print(f"National codes: {opts.national_codes}")


Analysis options loaded from file
External ID column: user_id
Unit type: percent
National codes: None


In [None]:
df_kwargs = {}
if opts.national_codes: 
    df_kwargs['national_codes'] = opts.national_codes
if opts.external_id_column: 
    df_kwargs['external_id_column'] = opts.external_id_column
if opts.unit_type: 
    df_kwargs['unit_type'] = opts.unit_type

print(f"DataFrame kwargs: {df_kwargs}")

whisp_df = whisp.whisp_formatted_stats_geojson_to_df(file_path, **df_kwargs)

print(f"DataFrame columns: {list(whisp_df.columns)}")
whisp_df.head()


DataFrame kwargs: {'external_id_column': 'user_id', 'unit_type': 'percent'}
An error occurred when trying to set the external_id_column: user_id. Error: The column 'user_id' is missing from 68 out of 68 features in the collection. Available properties in first feature: ['system:index']
An error occurred during the conversion from EE to DataFrame: The column 'user_id' is missing from 68 out of 68 features in the collection. Available properties in first feature: ['system:index']
Creating schema for national_codes: None
[reformat.py | log_missing_columns() | l.267] INFO: All columns from dataframe found in the schema.
plotId, external_id, Area, Geometry_type, Country, ProducerCountry, Admin_Level_1, Centroid_lon, Centroid_lat, Unit, In_waterbody, EUFO_2020, GLAD_Primary, TMF_undist, GFC_TC_2020, Forest_FDaP, ESA_TC_2020, TMF_plant, Oil_palm_Descals, Oil_palm_FDaP, Coffee_FDaP, Cocoa_FDaP, Cocoa_ETH, Rubber_FDaP, Rubber_RBGE, Soy_Song_2020, TMF_def_2000, TMF_def_2001, TMF_def_2002, TMF_de

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,ESRI_2023_TC,GLC_FCS30D_TC_2022,Oil_palm_2023_FDaP,Rubber_2023_FDaP,Coffee_FDaP_2023,Cocoa_2023_FDaP,ESRI_2023_crop,GLC_FCS30D_crop_2022,GFW_logging_before_2020,geo


In [None]:
# Perform WHISP risk analysis
whisp_df_risk = whisp.whisp_risk(
    whisp_df,
    explicit_unit_type=opts.unit_type if opts.unit_type else None,
    national_codes=opts.national_codes if opts.national_codes else None
)

print(f"Risk analysis completed")
print(f"Result DataFrame shape: {whisp_df_risk.shape}")
print(f"Result DataFrame columns: {list(whisp_df_risk.columns)}")
whisp_df_risk.head()


In [None]:
# Clean data - handle NaN and infinity values
print("Cleaning data...")

for col in whisp_df_risk.columns:
    if pd.api.types.is_numeric_dtype(whisp_df_risk[col]):
        # Replace NaN, infinity values with None for numeric columns
        whisp_df_risk[col] = whisp_df_risk[col].replace([np.nan, np.inf, -np.inf], None)
    elif whisp_df_risk[col].dtype == 'object':
        # Fill NaN values with empty string for object columns
        whisp_df_risk[col] = whisp_df_risk[col].fillna('')

print("Data cleaning completed")

# Check for any remaining problematic values
print("\nChecking for remaining NaN/inf values:")
for col in whisp_df_risk.columns:
    if pd.api.types.is_numeric_dtype(whisp_df_risk[col]):
        nan_count = whisp_df_risk[col].isna().sum()
        inf_count = np.isinf(whisp_df_risk[col].fillna(0)).sum()
        if nan_count > 0 or inf_count > 0:
            print(f"  {col}: {nan_count} NaN, {inf_count} inf values")


In [None]:
# Export results
csv_file_path = os.path.splitext(file_path)[0] + '-result.csv'
json_file_path = os.path.splitext(file_path)[0] + '-result.json'

print(f"Exporting to CSV: {csv_file_path}")
whisp_df_risk.to_csv(csv_file_path, index=False, encoding='utf-8-sig')
print("CSV export completed")

print(f"Exporting to JSON: {json_file_path}")

if not legacy_mode:
    # Modern format using whisp converter
    whisp.convert_df_to_geojson(whisp_df_risk, json_file_path)
    print("JSON export completed (modern format)")
else:
    # Legacy mode - original format output
    print("Using legacy mode for JSON export...")
    df_dict = whisp_df_risk.to_dict(orient='records')
    
    for record in df_dict:
        for geo_field in ['geojson', 'geometry']:
            if geo_field in record and isinstance(record[geo_field], str):
                try:
                    record[geo_field] = ast.literal_eval(record[geo_field])
                except (ValueError, SyntaxError):
                    pass
    
    class CustomJSONEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, float) and (np.isnan(obj) or np.isinf(obj)):
                return None
            return super().default(obj)
            
    def clean_nan_values(item):
        if isinstance(item, dict):
            return {k: clean_nan_values(v) for k, v in item.items()}
        elif isinstance(item, list):
            return [clean_nan_values(i) for i in item]
        elif isinstance(item, float) and (np.isnan(item) or np.isinf(item)):
            return None
        else:
            return item

    clean_dict = clean_nan_values(df_dict)

    try:
        with open(json_file_path, 'w') as outfile:
            json.dump(clean_dict, outfile, indent=4, cls=CustomJSONEncoder)
        print(f"JSON data exported to {json_file_path}")
    except TypeError as e:
        print(f"Error in JSON conversion: {e}")
        json_data = whisp_df_risk.to_json(orient='records', date_format='iso', force_ascii=False)
        with open(json_file_path, 'w') as outfile:
            outfile.write(json_data)
        print(f"Fallback JSON data exported to {json_file_path}")

print("Export completed!")


## Manual Testing Instructions

To use this notebook for testing:

1. **Update the file path** in the 4th cell - replace `"path/to/your/test/file.json"` with your actual GeoJSON file path
2. **Run each cell sequentially** to step through the analysis process
3. **Inspect intermediate results** by examining the DataFrames (`whisp_df`, `whisp_df_risk`)
4. **Modify parameters** as needed:
   - Change `legacy_mode` to `True` if you need the legacy JSON format
   - Modify analysis options manually if needed
5. **Check outputs** - CSV and JSON files will be saved with `-result` suffix

The notebook breaks down the original script into logical steps, allowing you to:
- Test with different input files
- Examine intermediate data transformations
- Debug issues step by step
- Modify parameters without restarting the entire process
