#### Imports/setup

In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import ee
import os
# import pandas as pd
import geemap
import time
import functools
import os
import sys
# from datetime import datetime

import requests # may not be needed
from sidecar import Sidecar

from modules.gee_initialize import initialize_ee 
initialize_ee()

from parameters.config_runtime import (
    geo_id_column, 
    out_file_wide, 
    out_shapefile
)

from modules.agstack_to_gee import (
    start_agstack_session,
    get_agstack_token,
    register_fc_and_append_to_csv,
    # add_geo_ids_to_csv_from_lookup_df,
    add_geo_ids_to_csv_from_lookup_csv,
    add_empty_column_to_csv,
    remove_column_from_csv
)

# from modules.multiband_stats import get_stats,reformat_whisp_fc,get_stats_formatted

from parameters.config_asr_url_info import asset_registry_base, user_registry_base

from parameters.config_asr_credentials import email, password

from modules.utils import (
    collection_properties_to_df,
    remove_geometry_from_feature_collection,
    get_centroid
)

def whisp_stats_as_df (roi): 
    %store roi
    %run process_whisp_stats.ipynb   
    return df_out
   

#### Set up asset registry

In [9]:
session = start_agstack_session(email,password,user_registry_base)

token = get_agstack_token(email, password, asset_registry_base)

#### Inputs : without GEOIDS

GEE asset (feature collection) 

In [10]:
# example_plot_feature_col_asset = 'projects/ee-andyarnellgee/assets/p0004_commodity_mapper_support/seg_civ_PCBM'
example_plot_feature_col_asset = 'projects/ee-cocoacmr/assets/feature_data/cicc_polys_2k'
# Define the fraction of features you want to sample (e.g., 0.5 for 50%)
sampleFraction = 0.0010

In [11]:
fc = ee.FeatureCollection(example_plot_feature_col_asset)

randomColumn = 'random'
fc_r = fc.randomColumn(randomColumn,4)

# Filter the feature collection based on the random column to get the sample
fc_sample = fc_r.filter(ee.Filter.lt(randomColumn, sampleFraction));

roi = fc_sample
print(roi.size().getInfo())
roi

4


### Whisp it
- If over >500 polygons currently this will go to Google Drive and require manual uploading here
- Stats sent to a separate notebook 'process_whisp_stats.pynb'

In [22]:
df = whisp_stats_as_df(roi)

Stored 'roi' (FeatureCollection)
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
...stats processing started
...ee to df processing time: 3.7743704319000244 seconds


### Display it


In [23]:
df

Unnamed: 0,Area_ha,Cocoa_ETH,Country,Date_debut,Date_fin,ESA_TC_2020,EUFO_2020,GFC_Loss_Year_2001,GFC_Loss_Year_2002,GFC_Loss_Year_2003,...,tmf_deg_2013,tmf_deg_2014,tmf_deg_2015,tmf_deg_2016,tmf_deg_2017,tmf_deg_2018,tmf_deg_2019,tmf_deg_2020,tmf_deg_2021,tmf_deg_2022
0,6.3,0,CMR,1633330800000,1659078000000,6.307695,6.079183,0,0,0,...,0,0,0.0,0.0,0.698073,0.194431,0.089084,0.213878,0.261195,0.445417
1,3.5,0,CMR,1633330800000,1659078000000,3.499814,3.499814,0,0,0,...,0,0,0.14672,0.000777,0.0,0.0,0.0,0.0,0.23726,0.0
2,3.3,0,CMR,1633330800000,1659078000000,3.328881,3.328881,0,0,0,...,0,0,0.010097,0.0,0.0,0.0,0.0,0.318756,0.0,0.0
3,0.8,0,CMR,1633330800000,1659078000000,0.765615,0.242137,0,0,0,...,0,0,0.0,0.0,0.038234,0.0,0.0,0.0,0.0,0.0


#### Export it to CSV file
NB skip if exporting to Google Drive

In [24]:
#export output csv
print  (out_file_wide)
df.to_csv(path_or_buf=out_file_wide,header=True)

print ("output csv: ", out_file_wide)


/home/sepal-user/whisp/whisp_output_table.csv
output csv:  /home/sepal-user/whisp/whisp_output_table.csv


#### Add Geo ids
##### Step 1. Register polygons:  compiles geo_ids in a lookup csv
- NB registration takes a long time if many polygons (a few seconds each one). AgStack will update this in future. 
- In the meantime to stop timeouts extend session length click dollar sign and edit under 'Sessions'.
- If processing doesn't complete, rerun and it should pickup from where you got to.

In [25]:
register_fc_and_append_to_csv(
    feature_col=roi,
    geo_id_column="Geo_id",
    output_lookup_csv="temp_geo_id_lookup.csv",
    join_id_column="system:index",
    token=token,
    session=session,
    asset_registry_base=asset_registry_base,
    override_checks=False,
    remove_temp_csv=False,
    debug=True)


Check passed: feature collection and table same size: 4 rows
Number without geo ids:0. 
 Processing started...
Backup file copied successfully to: csvs/temp_geo_id_lookup_20240416_103755.csv
Done


##### Step 2. Join geo ids from lookup csv to Whisp stats csv

In [26]:
add_geo_ids_to_csv_from_lookup_csv(
        input_csv="whisp_output_table.csv",
        geo_id_lookup_csv="temp_geo_id_lookup.csv",
        join_id_column="system:index",
        geo_id_column=geo_id_column,
        # override_checks=False, # needs implementing
        overwrite=False,
        drop_geo=False,
        debug=True)

new csv:  copy_whisp_output_table.csv


##### Optional: remove "system:index" column 
NB this is needed for joining geo_ids to csv (from lookup table). Check you have all your geo_ids first and if in doubt run on a copy of the results


In [27]:
remove_column_from_csv(
    csv_file="copy_whisp_output_table.csv", # this may change depending on if overwrite is on
    column_name="system:index"
)

Column 'system:index' removed successfully from copy_whisp_output_table.csv


#### TO DO: 
    - column order not kept if batch task sent to drive; 
    - fool proof the join to csv (as with the join to fc version, so you know how many were joined etc) etc; 
    - possibly: better checks for lookup creation - check system ids match and prompt to remove old lookup if so
    - possibly: make lookup creation output stay on one line; get system_index printed so can see where errors happen
    - error log for the lookup creation (currently have to check for empty rows)
    