### Whisp a GeoJSON via Google drive
- Use this when "whisp_geojson_to_csv' notebook fails due to time out errors
- NB requires collecting outputs from Google Drive 

Import packages
- NB to avoid complications from altering main python environment run using a virtual environment (https://docs.python.org/3/tutorial/venv.html)

In [1]:
import ee

import pandas as pd

from pathlib import Path

import whisp

ee.Authenticate() # Authenticate to Earth Engine (will open a browser window)
ee.Initialize() # Initialize the Earth Engine library


EE auto-initialized with default credentials.


Get a feature collection

In [2]:
GEOJSON_EXAMPLE_FILEPATH = (
    Path.cwd().parents[0]/ "test" / "fixtures" / "geojson_example.geojson"
)
print(GEOJSON_EXAMPLE_FILEPATH)


c:\Users\Arnell\Documents\GitHub\whisp_sustaain\test\fixtures\geojson_example.geojson


Whisp it

In [3]:
whisp.whisp_stats_geojson_to_drive(GEOJSON_EXAMPLE_FILEPATH)

Reading GeoJSON file from: c:\Users\Arnell\Documents\GitHub\whisp_sustaain\test\fixtures\geojson_example.geojson
['Area', 'Cocoa_bnetd', 'Oil_palm_Descals', 'ESA_fire_before_2020', 'ESA_fire_2001', 'ESA_fire_2002', 'ESA_fire_2003', 'ESA_fire_2004', 'ESA_fire_2005', 'ESA_fire_2006', 'ESA_fire_2007', 'ESA_fire_2008', 'ESA_fire_2009', 'ESA_fire_2010', 'ESA_fire_2011', 'ESA_fire_2012', 'ESA_fire_2013', 'ESA_fire_2014', 'ESA_fire_2015', 'ESA_fire_2016', 'ESA_fire_2017', 'ESA_fire_2018', 'ESA_fire_2019', 'ESA_fire_2020', 'ESA_TC_2020', 'Cocoa_ETH', 'Cocoa_FDaP', 'Forest_FDaP', 'Oil_palm_FDaP', 'Rubber_FDaP', 'DIST_after_2020', 'DIST_year_2024', 'DIST_year_2025', 'GFC_TC_2020', 'GFC_loss_after_2020', 'GFC_loss_before_2020', 'GFC_loss_year_2001', 'GFC_loss_year_2002', 'GFC_loss_year_2003', 'GFC_loss_year_2004', 'GFC_loss_year_2005', 'GFC_loss_year_2006', 'GFC_loss_year_2007', 'GFC_loss_year_2008', 'GFC_loss_year_2009', 'GFC_loss_year_2010', 'GFC_loss_year_2011', 'GFC_loss_year_2012', 'GFC_loss

Manual step 
- download from Google Drive when finished
- place in output folder (or alternative location)



Import CSV of raw stats


In [4]:
# Define output directory
out_directory = Path.cwd().parents[0] / "test" / "output" # directory untracked by git

# Define output file path
stats_file_from_drive = out_directory / "whisp_output_table.csv" # edit as required

df_stats = pd.read_csv(stats_file_from_drive)


Display table 

In [5]:
# raw stats
df_stats

Unnamed: 0,system:index,Admin_Level_1,Area,Centroid_lat,Centroid_lon,Cocoa_ETH,Cocoa_FDaP,Cocoa_bnetd,Country,DIST_after_2020,...,TMF_deg_2021,TMF_deg_2022,TMF_deg_2023,TMF_deg_after_2020,TMF_deg_before_2020,TMF_plant,TMF_undist,Unit,plotId,.geo
0,0,Ashanti Region,1.939,6.15954,-1.611942,0.0,0.0,0.0,GHA,0.0,...,0.0,0.0,0.0,0.0,0.637,0.0,0.0,ha,1.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
1,1,Ashanti Region,4.152,6.104735,-1.644732,0.01,0.01,0.0,GHA,0.0,...,0.0,0.0,0.0,0.0,1.971,0.0,0.0,ha,2.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
2,2,Western Region,16.6,5.981149,-2.157144,0.0,0.0,0.0,GHA,0.0,...,0.0,0.0,0.089,0.089,0.738,0.0,15.862,ha,3.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
3,3,South Sumatra,31.213,-3.054668,103.956096,0.0,0.0,0.0,IDN,0.0,...,0.0,0.0,0.0,0.0,0.0,31.213,0.0,ha,4.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
4,4,South Sumatra,1.964,-3.068831,103.970371,0.0,0.0,0.0,IDN,0.0,...,0.0,0.0,0.0,0.0,0.52,0.0,0.0,ha,5.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
5,5,South Sumatra,12.725,-3.082922,103.975182,0.0,0.0,0.0,IDN,0.0,...,0.0,0.0,0.0,0.0,2.49,0.0,7.558,ha,6.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
6,6,South Sumatra,20.882,-3.083808,103.977512,0.0,0.0,0.0,IDN,0.0,...,0.0,0.0,0.714,0.714,4.253,0.0,5.569,ha,7.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
7,7,Lagunes,8.279,5.711935,-4.101646,0.0,0.376,3.013,CIV,0.0,...,0.0,0.0,0.0,0.0,1.795,0.0,0.006,ha,8.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
8,8,Lagunes,1.981,5.673811,-4.086848,1.494,0.011,0.006,CIV,0.0,...,0.0,0.0,0.0,0.0,0.803,0.0,0.0,ha,9.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
9,9,District Autonome D'Abidjan,3.797,5.572136,-4.119589,0.0,0.004,0.05,CIV,0.0,...,0.0,0.0,0.0,0.0,0.132,0.0,0.0,ha,10.0,"{""type"":""MultiPoint"",""coordinates"":[]}"


Format stats based on Whisp schema


In [8]:

df_stats = whisp.convert_iso3_to_iso2(df=df_stats, iso3_column="Country", iso2_column="ProducerCountry") # temp conversion to add iso2 column

df_formatted_stats = whisp.validate_dataframe_using_lookups(df_stats)


File C:\Users\Arnell\Documents\GitHub\whisp_sustaain\whisp\parameters\lookup_gee_datasets.csv changed, updating schema...
File C:\Users\Arnell\Documents\GitHub\whisp_sustaain\whisp\parameters\lookup_context_and_metadata.csv changed, updating schema...
Creating or updating schema based on changed files...
system:index, .geo
external_id


Display table

In [9]:
df_formatted_stats #view output dataframe


Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,GFC_loss_before_2020,ESA_fire_before_2020,MODIS_fire_before_2020,RADD_before_2020,TMF_deg_after_2020,TMF_def_after_2020,GFC_loss_after_2020,MODIS_fire_after_2020,RADD_after_2020,DIST_after_2020
0,1.0,,1.939,Polygon,GHA,GH,Ashanti Region,-1.611942,6.15954,ha,...,1.552,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,,4.152,Polygon,GHA,GH,Ashanti Region,-1.644732,6.104735,ha,...,2.724,0.0,0.0,0.0,0.0,0.089,0.743,0.0,0.0,0.0
2,3.0,,16.6,Polygon,GHA,GH,Western Region,-2.157144,5.981149,ha,...,0.0,0.0,0.0,1.11,0.089,0.0,0.0,0.0,1.203,0.0
3,4.0,,31.212999,Polygon,IDN,ID,South Sumatra,103.956096,-3.054668,ha,...,31.021999,31.212999,6.403,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,,1.964,Polygon,IDN,ID,South Sumatra,103.970371,-3.068831,ha,...,1.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6.0,,12.725,Polygon,IDN,ID,South Sumatra,103.975182,-3.082922,ha,...,1.899,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7.0,,20.882,Polygon,IDN,ID,South Sumatra,103.977512,-3.083808,ha,...,9.043,0.0,0.0,0.0,0.714,0.0,0.0,0.0,0.0,0.0
7,8.0,,8.279,Polygon,CIV,CI,Lagunes,-4.101646,5.711935,ha,...,3.194,0.0,0.0,0.0,0.0,0.0,1.602,0.0,0.001,0.0
8,9.0,,1.981,Polygon,CIV,CI,Lagunes,-4.086848,5.673811,ha,...,0.435,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,10.0,,3.797,Polygon,CIV,CI,District Autonome D'Abidjan,-4.119589,5.572136,ha,...,1.161,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Export csv

In [10]:
# Define output directory
out_directory = Path.cwd().parents[0] / "test" / "output" # directory untracked by git

# Define output file path
output_stats_file = out_directory / "whisp_output_table.csv" # edit as required

# Save statistics to CSV
df_formatted_stats.to_csv(path_or_buf=output_stats_file,index=False)

print(f"Formatted stats saved to: {output_stats_file}")


Formatted stats saved to: c:\Users\Arnell\Documents\GitHub\whisp_sustaain\test\output\whisp_output_table.csv


Calculate risk category

In [11]:
# add risk columns to end of dataframe
df_w_risk = whisp.whisp_risk(df=df_formatted_stats)

  df[new_column_name] = low_name
  df[new_column_name] = low_name
  df[new_column_name] = low_name
  df[new_column_name] = low_name
  df.at[index, "EUDR_risk"] = "low"


Display table with risk columns

In [12]:
df_w_risk

Unnamed: 0,plotId,external_id,Area,Geometry_type,Country,ProducerCountry,Admin_Level_1,Centroid_lon,Centroid_lat,Unit,...,TMF_def_after_2020,GFC_loss_after_2020,MODIS_fire_after_2020,RADD_after_2020,DIST_after_2020,Indicator_1_treecover,Indicator_2_commodities,Indicator_3_disturbance_before_2020,Indicator_4_disturbance_after_2020,EUDR_risk
0,1.0,,1.939,Polygon,GHA,GH,Ashanti Region,-1.611942,6.15954,ha,...,0.0,0.0,0.0,0.0,0.0,yes,yes,yes,no,low
1,2.0,,4.152,Polygon,GHA,GH,Ashanti Region,-1.644732,6.104735,ha,...,0.089,0.743,0.0,0.0,0.0,yes,no,yes,yes,low
2,3.0,,16.6,Polygon,GHA,GH,Western Region,-2.157144,5.981149,ha,...,0.0,0.0,0.0,1.203,0.0,yes,no,yes,yes,low
3,4.0,,31.212999,Polygon,IDN,ID,South Sumatra,103.956096,-3.054668,ha,...,0.0,0.0,0.0,0.0,0.0,yes,yes,yes,no,low
4,5.0,,1.964,Polygon,IDN,ID,South Sumatra,103.970371,-3.068831,ha,...,0.0,0.0,0.0,0.0,0.0,yes,yes,yes,no,low
5,6.0,,12.725,Polygon,IDN,ID,South Sumatra,103.975182,-3.082922,ha,...,0.0,0.0,0.0,0.0,0.0,yes,no,yes,no,low
6,7.0,,20.882,Polygon,IDN,ID,South Sumatra,103.977512,-3.083808,ha,...,0.0,0.0,0.0,0.0,0.0,yes,yes,yes,yes,low
7,8.0,,8.279,Polygon,CIV,CI,Lagunes,-4.101646,5.711935,ha,...,0.0,1.602,0.0,0.001,0.0,yes,yes,yes,yes,low
8,9.0,,1.981,Polygon,CIV,CI,Lagunes,-4.086848,5.673811,ha,...,0.0,0.0,0.0,0.0,0.0,yes,yes,yes,no,low
9,10.0,,3.797,Polygon,CIV,CI,District Autonome D'Abidjan,-4.119589,5.572136,ha,...,0.0,0.0,0.0,0.0,0.0,yes,yes,yes,no,low


Export table with risk columns to csv 

In [None]:
# Define output file path
output_risk_file = out_directory / "whisp_output_table_w_risk.csv" # edit as required

# Save statistics with added risk columns to CSV
df_w_risk.to_csv(path_or_buf=output_risk_file,index=False)

print(f"Table with risk columns saved to: {output_risk_file}")