#### Imports/setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import ee
import os
# import pandas as pd
import geemap
import time
import functools
import os
import sys
# from datetime import datetime

import requests # may not be needed
from sidecar import Sidecar

from modules.gee_initialize import initialize_ee 
initialize_ee()

from parameters.config_runtime import (
    geo_id_column, 
    out_file_wide, 
    out_shapefile
)

from modules.agstack_to_gee import (
    start_agstack_session,
    get_agstack_token,
    register_fc_and_append_to_csv,
    # add_geo_ids_to_csv_from_lookup_df,
    add_geo_ids_to_csv_from_lookup_csv,
    add_empty_column_to_csv,
    remove_column_from_csv
)

# from modules.stats import get_stats,reformat_whisp_fc,get_stats_formatted

from parameters.config_asr_url_info import asset_registry_base, user_registry_base

from parameters.config_asr_credentials import email, password

from modules.utils import (
    collection_properties_to_df,
    remove_geometry_from_feature_collection,
    get_centroid
)

from modules.tidy_tables import update_eudr_risk, add_risk_column, select_years_in_range, create_wildcard_column_list

def whisp_stats_as_df (roi): 
    %store roi
    %run process_whisp_stats.ipynb   
    return df_out
   

Earth Engine has been initialized with the specified credentials.


#### Set up asset registry

In [3]:
session = start_agstack_session(email,password,user_registry_base)

token = get_agstack_token(email, password, asset_registry_base)

#### Inputs : without GEOIDS

GEE asset (feature collection) 

In [4]:
example_plot_feature_col_asset = 'projects/ee-andyarnellgee/assets/p0004_commodity_mapper_support/seg_civ_PCBM'
# example_plot_feature_col_asset = 'projects/ee-cocoacmr/assets/feature_data/cicc_polys_2k'
# Define the fraction of features you want to sample (e.g., 0.5 for 50%)
sampleFraction = 0.0010

In [5]:
fc = ee.FeatureCollection(example_plot_feature_col_asset)

randomColumn = 'random'
fc_r = fc.randomColumn(randomColumn,4)

# Filter the feature collection based on the random column to get the sample
fc_sample = fc_r.filter(ee.Filter.lt(randomColumn, sampleFraction));

roi = fc_sample
print(roi.size().getInfo())
roi

7


### Whisp it
- If over >500 polygons currently this will go to Google Drive and require manual uploading here
- Stats sent to a separate notebook 'process_whisp_stats.pynb'

In [6]:
df = whisp_stats_as_df(roi)

Stored 'roi' (FeatureCollection)
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Datasets present in lookup:  True
['Geo_id', 'Area_ha', 'Country']
geo id not present ['Area_ha', 'Country']
...stats processing started
...ee to df processing time: 8.661749601364136 seconds


### Display it


In [7]:
df

Unnamed: 0,Area_ha,Country,GLAD_Primary,TMF_undist,JAXA_FNF_2020,GFC_TC_2020,ESA_TC_2020,TMF_disturbed,TMF_plant,Oil_palm_Descals,...,MODIS_fire_2019,MODIS_fire_2020,MODIS_fire_2021,MODIS_fire_2022,MODIS_fire_2023,MODIS_fire_2024,system:index,random,count,label
0,0.4,CIV,0,0.11877,0.385054,0.385054,0.385054,0.266284,0,0,...,0,0,0,0,0,0,000000000000000007b6,0.000733,39,267000120
1,0.7,CIV,0,0.256438,0.701942,0.346283,0.701942,0.40494,0,0,...,0,0,0,0,0,0,00000000000000000121,0.000447,71,-1815148569
2,3.1,CIV,0,2.548115,3.14204,2.844807,3.043298,0.593924,0,0,...,0,0,0,0,0,0,00000000000000000df1,0.000588,318,2016339340
3,4.0,CIV,0,0.87986,3.924154,3.431185,3.933956,2.905826,0,0,...,0,0,0,0,0,0,0000000000000000029d,0.000947,405,1912978759
4,4.1,CIV,0,2.156063,4.108904,3.515044,4.108904,1.952841,0,0,...,0,0,0,0,0,0,00000000000000000835,0.00044,416,-1068478278
5,5.4,CIV,0,0.0,5.392016,4.08973,5.006714,2.517342,0,0,...,0,0,0,0,0,0,000000000000000009d2,0.000561,546,1374089199
6,11.2,CIV,0,2.064316,10.654769,7.086431,10.901288,8.154909,0,0,...,0,0,0,0,0,0,000000000000000014d6,0.000397,1131,1362793446


#### Export it to CSV file
NB skip if exporting to Google Drive

In [8]:
#export output csv
print  (out_file_wide)
df.to_csv(path_or_buf=out_file_wide,header=True)

print ("output csv: ", out_file_wide)


/home/sepal-user/whisp/whisp_output_table.csv
output csv:  /home/sepal-user/whisp/whisp_output_table.csv


#### Add Geo ids
##### Step 1. Register polygons:  compiles geo_ids in a lookup csv
- NB registration takes a long time if many polygons (a few seconds each one). AgStack will update this in future. 
- In the meantime to stop timeouts extend session length click dollar sign and edit under 'Sessions'.
- If processing doesn't complete, rerun and it should pickup from where you got to.

In [9]:
register_fc_and_append_to_csv(
    feature_col=roi,
    geo_id_column="Geo_id",
    output_lookup_csv="temp_geo_id_lookup.csv",
    join_id_column="system:index",
    token=token,
    session=session,
    asset_registry_base=asset_registry_base,
    override_checks=False,
    remove_temp_csv=False,
    debug=True)


Check passed: feature collection and table same size: 7 rows
Number without geo ids:0. 
 Processing started...
Backup file copied successfully to: backup_csvs/temp_geo_id_lookup_20240419_161523.csv
Done


##### Step 2. Join geo ids from lookup csv to Whisp stats csv

In [10]:
add_geo_ids_to_csv_from_lookup_csv(
        input_csv="whisp_output_table.csv",
        geo_id_lookup_csv="temp_geo_id_lookup.csv",
        join_id_column="system:index",
        geo_id_column=geo_id_column,
        # override_checks=False, # needs implementing
        overwrite=False,
        drop_geo=False,
        debug=True)

new csv:  copy_whisp_output_table.csv


##### Optional: remove "system:index" column 
NB this is needed for joining geo_ids to csv (from lookup table). Check you have all your geo_ids first and if in doubt run on a copy of the results


In [11]:
# remove_column_from_csv(
#     csv_file="copy_whisp_output_table.csv", # this may change depending on if overwrite is on
#     column_name="system:index"
# )

In [12]:
# remove_column_from_csv(
#     csv_file="whisp_output_table.csv", # this may change depending on if overwrite is on
#     column_name="Geo_id"
# )

##### Calculate EUDR risk category

In [22]:
# Replace 'your_file.csv' with the actual name of your CSV file
csv_file ='copy_whisp_output_table.csv'

# List of columns to check (TO DO: replace using lookup)
cols_treecover = ['GLAD_Primary', 'TMF_undist', 'JAXA_FNF_2020', 'GFC_TC_2020', 'ESA_TC_2020'] ###"EUFO_2020"
cols_commodities = ['TMF_plant', 'Oil_palm_Descals', 'Oil_palm_FDaP', 'Cocoa_ETH','Cocoa_bnetd']

cols_dist_all = create_wildcard_column_list(df,["RADD_year","loss_year", "fire", "TMF_deg", "TMF_def"])

cols_dist_pre_2020 = select_years_in_range(string_list=cols_dist_all, min_year=1970, max_year=2020)

cols_dist_post_2020 = select_years_in_range(string_list=cols_dist_all, min_year=2021, max_year=2070)


df_in = pd.read_csv('copy_whisp_output_table.csv')


df_w_risk = add_risk_column(df=df_in,
                            columns_to_check=cols_treecover, 
                            threshold=1, #testing value only as with ha
                            new_column_name="Treecover_risk",
                            sum_comparison=False)

df_w_risk = add_risk_column(df=df_w_risk, 
                            columns_to_check=cols_commodities, 
                            threshold=1,#testing value only as with ha
                            new_column_name="Commodities_risk",
                            sum_comparison=False)

df_w_risk = add_risk_column(df=df_w_risk, 
                            columns_to_check=cols_dist_pre_2020,
                            threshold=1,#testing value only as with ha
                            new_column_name="Disturbance_pre_2020_risk",
                            sum_comparison=False) 

df_w_risk = add_risk_column(df=df_w_risk,
                            columns_to_check=cols_dist_post_2020,
                            threshold=0,#testing value only as with ha
                            new_column_name="Disturbance_post_2020_risk",
                            sum_comparison=False)


#run criteria:
df_w_risk = update_eudr_risk(df_w_risk)

output_file='copy_whisp_output_table_w_risk.csv'

df_w_risk.to_csv(output_file)

df_w_risk

Unnamed: 0.1,Unnamed: 0,Geo_id,Area_ha,Country,GLAD_Primary,TMF_undist,JAXA_FNF_2020,GFC_TC_2020,ESA_TC_2020,TMF_disturbed,...,MODIS_fire_2024,system:index,random,count,label,Treecover_risk,Commodities_risk,Disturbance_pre_2020_risk,Disturbance_post_2020_risk,EUDR_risk
0,0,eb7183ae1c62b147e099de79265607bf1a53cb4ff717d5...,0.4,CIV,0,0.11877,0.385054,0.385054,0.385054,0.266284,...,0,000000000000000007b6,0.000733,39,267000120,low,low,low,low,low
1,1,37485ae8d1dd2d047caa0fbc57a72e508e83b50bd3c4c1...,0.7,CIV,0,0.256438,0.701942,0.346283,0.701942,0.40494,...,0,00000000000000000121,0.000447,71,-1815148569,low,low,low,low,low
2,2,375ba910a586a125ff451e960451990123b31ba0e54fcd...,3.1,CIV,0,2.548115,3.14204,2.844807,3.043298,0.593924,...,0,00000000000000000df1,0.000588,318,2016339340,high,low,low,low,more_info_needed
3,3,146d51ab4e6a931f392efde0b270a3865e6d59a8eea559...,4.0,CIV,0,0.87986,3.924154,3.431185,3.933956,2.905826,...,0,0000000000000000029d,0.000947,405,1912978759,high,high,low,low,low
4,4,b98c8bc02d5057ad4f2a1ceea4fa235af0d1333703ffae...,4.1,CIV,0,2.156063,4.108904,3.515044,4.108904,1.952841,...,0,00000000000000000835,0.00044,416,-1068478278,high,high,low,low,low
5,5,3f5d8a8b34a6e500008bc7d32783bd7bdaa673bd64349b...,5.4,CIV,0,0.0,5.392016,4.08973,5.006714,2.517342,...,0,000000000000000009d2,0.000561,546,1374089199,high,low,high,low,low
6,6,326a2033a35144fcef36123bed53e12b3576dda199883f...,11.2,CIV,0,2.064316,10.654769,7.086431,10.901288,8.154909,...,0,000000000000000014d6,0.000397,1131,1362793446,high,high,high,high,low
