## Fetch GEOIDs for Plots of Land

In [None]:
GHA_ids = ['88bec54ad04804f5b1fafbc131266640a129be2840fa6797cda358d7e831b907', 
        'ef2f7c46fbe4fc892fdb81f9a31c9c507b9f1e4548504247dcbbab28cf8e436c',
        '97408ef7bdac487e4a42e4abf20492b786310889fd4b0478603e2d0004c40bfb']

IDN_ids = ['c288d6c94efa9011c0e3452af9f7fa0941661377030e10d29c68764617f9816d', 
       '1a41a309ae2387f36a604c9a6c81887e64357a7f61d228758e23ef766286fcd7',
       '1a4472dc40700ef33f931863f58d444f243d64418616678fcf85c57e1f4bbf45',
       '8e2accea7ddbb84b7f6001e00bcb60f57f563c80633b53859993522a6f05727a']

CIV_ids = ['0520cfac98fbc1bd7952b1c07a9f6983b83625722b6f665ea83ac9aad3512918',
           'b84f55de2b7f3c77d1cbeb8b026a1b29be42d8b08d92058c9143e0556456820f',
           'b7c15efb6e3c63fcfe649a2d994973a6f5caa844f720f0edb7cf24f6a6c3c1b3',
            'fa2aff0d60cf1bc0e1f1dd4b91daf932940c31c021ca1b84f5b9445855eef02f']

all_geo_ids= GHA_ids + IDN_ids #+ CIV_ids

## Process public and relevant datasets

### 1. Imports

In [None]:
import json
import ee
import geemap
import folium ###
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import time
import math ###
import geojson 
from datetime import datetime ###
from sidecar import Sidecar ###

from modules.gee_initialize import initialize_ee 

initialize_ee()

from modules.json_to_ee import json_to_feature_with_id ###
from modules.agstack_setup import start_agstack_session ###
import modules.agstack_to_gee as agstack_to_gee ###
import modules.area_stats as area_stats
from modules.country_allocation import find_country_from_modal_stats
import modules.tidy_tables as tidy_tables
from parameters import * # NB for run-specific parameters edit "parameters/config_runtime"
from modules.file_to_ceo import get_ceo_url ###
from modules.image_prep import export_image_collection_to_asset, add_multi_lookup_properties_to_image_collection
from modules.image_prep import add_multi_lookup_properties_to_image_collection
from parameters.config_lookups import lookup_gee_datasets

print ("imports complete")

In [None]:
if os.path.exists("parameters/lookup_gadm_country_codes_to_iso3.csv"):
    if debug: print ("file exists")
else:
    %run misc/_create_lookups_gadm.py # only need run only once when 

### 2. Call and process datasets

In [None]:
if use_existing_image_collection:
    images_iCol = ee.ImageCollection(target_image_col_id) 
    print ("using existing image collection asset")
else:
    print ("compiling image collection on the fly...")
    from datasets.combining_datasets import images_iCol



In [None]:
## get lists from lookup for different streams of processing
from  dataset_properties.make_processing_lists_from_lookup import buffer_stats_list,\
                                                                  presence_only_flag_list,\
                                                                  country_allocation_stats_only_list, \
                                                                  plot_stats_list, \
                                                                  decimal_place_column_list
if debug: print ("processed")

In [None]:
#filtering to remove images in exclusion list (from parameters.config_runtime), useful if dont want certain datasets
images_iCol_filt = images_iCol.filter(ee.Filter.inList("dataset_id",exclusion_list_dataset_ids).Not())
if debug: print ("excluding dataset ids: ", exclusion_list_dataset_ids)

In [None]:
if update_iCol_properties ==True or use_existing_image_collection == False:
    images_iCol_w_properties = add_multi_lookup_properties_to_image_collection(images_iCol_filt,"dataset_id",
                                                            lookup_gee_datasets, "dataset_id",
                                                            "dataset_name","system:index",
                                                            "dataset_order","dataset_order",
                                                            "country_allocation_stats_only","country_allocation_stats_only")
    images_iCol_filt = images_iCol_w_properties
    if debug: print ("properties added/updated")

#### Start session

In [None]:
session = start_agstack_session(email,password,user_registry_base,debug)

#### Fetch and prepare features

In [None]:
#fetch and convert into feature collection
roi = agstack_to_gee.geo_id_or_ids_to_feature_collection(
    all_geo_ids=all_geo_ids, geo_id_column=geo_id_column, session=session, asset_registry_base=asset_registry_base,required_area=4,area_unit="ha",debug=debug)

roi = area_stats.add_area_hectares_property_to_feature_collection(roi,geometry_area_column)

#select only columns of interest
roi  = roi.select([geometry_area_column,geo_id_column]) 

#Create additional buffer zones for deforestation risk 
if len(buffer_stats_list)>=1:
    roi_buffer = roi.map(lambda feature:
            feature.buffer(local_alerts_buffer_radius,max_error_alert_buff))
else:
    roi_buffer = "empty"

### 3. Compute statistics

Calculating zonal statistics for continuous data (e.g tree cover) within polygon(s)

##### i) Mapping over image collection with reduce regions (creates long format temporary table)

In [None]:
# get the start time
st = time.time()

if debug: print ("processing stats...")

zonal_stats_out = area_stats.zonal_stats_plot_w_buffer(roi, roi_buffer, images_iCol_filt, plot_stats_list, buffer_stats_list, reducer_choice, debug)

# convert to Pandas Dataframe
df = geemap.ee_to_pandas(zonal_stats_out) # limit of 5000 (unlikely to need more but i have code for it if needed)

if debug: print ('Total execution time:', time.time() - st, 'seconds')# get the execution time

Look up table linking country codes to country names (from administrative feature collection) is stored here: scripts: create_country_lookup.py

##### ii): Create lookup tables for country allocation
Approach is based on raster stats and listing the country for a specific geometry based on which has most overlap



Make on-the-fly look up table to link country name to geo id based on raster stats
- uses rasterised admin layer with admin codes as pixel values
- for each geo id finds most common value in that geometry (i.e. "mode" statistic)

In [None]:
lookup_geo_id_to_country_names = find_country_from_modal_stats(
    roi=roi,
    image_collection = images_iCol_filt,
    reducer_choice=reducer_choice,
    geo_id_column=geo_id_column,
    country_dataset_id=country_dataset_id,
    admin_code_col_name=admin_code_col_name,
    lookup_country_codes_to_names=lookup_country_codes_to_names)


##### iii) Reformat results table
- convert areas to percentage cover of plot
- long to wide format; 
- set presence only flags
- add in country names (using lookup tables) to the final results

In [None]:
#add percentage column
df["percentage"] = (df["sum"]/
                             df[geometry_area_column])*100


#convert to wide format (one row per geo_id)
df_wide = df.pivot_table(index=[geo_id_column,geometry_area_column],
                                  columns=['dataset_name'],values=['percentage'])

#tidy unwanted headers (runs in place so no need to assign variable)
tidy_tables.tidy_dataframe_after_pivot(df_wide) 

# convert positive results values to "True" for specific columns and fix decimals
df_wide[presence_only_flag_list] = np.where(df_wide[presence_only_flag_list] > 0, "True", "-")
df_wide[decimal_place_column_list] = df_wide[decimal_place_column_list].round(decimals=0, out=None).astype(int)

# Reset index formatting
df_wide=df_wide.reset_index()

#decimals for geometry column
df_wide[geometry_area_column]=df_wide[geometry_area_column].round(decimals=1, out=None)

# #joins country name based on majority overlap with country 
df_wide_w_country = df_wide.merge(lookup_geo_id_to_country_names,on=geo_id_column)

##### iv) Further reformatting
- reorder columns
- reorder rows to match Geo_ID / ROI order
- add plot id column for easy reference
- remove underscores in column headers

In [None]:
#columns to add to start of table
prefix_columns_list =[geo_id_column,geometry_area_column,"Country"] 

#use lookup to reorder cols
df_wide_w_country_reordered_cols = tidy_tables.reorder_columns_by_lookup(
    df_wide_w_country,
    lookup_gee_datasets,
    'dataset_order',
    "dataset_name",
    prefix_columns_list).drop(country_dataset_name, axis=1, errors='ignore')

# reordering rows using geo_id order from feature collection (if more than one feature). NB Some repetition here with below. Avoiding Geopandas for speed of csv production.
if len(df_wide_w_country_reordered_cols)>1:
    df_wide_w_country_reordered_cols_n_rows = pd.merge(geemap.ee_to_pandas(roi)[geo_id_column],
                  df_wide_w_country_reordered_cols, 
                  left_on=geo_id_column, 
                  right_on=geo_id_column, 
                  how='inner')
    
#renaming for clarity
df_out = df_wide_w_country_reordered_cols_n_rows

#adding in index plot id column for visual cross referencing on map etc
df_out[plot_id_column] = df_out.index +1

df_out.set_index(plot_id_column, inplace = True)

ceo_cols_for_exporting = df_out.columns # with underscores for use in ceo export

# remove underscores in column names
df_out.columns = df_out.columns.str.replace('_', ' ')

In [None]:
#export output csv
df_out.to_csv(path_or_buf=out_file_wide,header=True)

if debug: print ("output csv: ", out_file_wide)
# df_wide

### 4. Convert to CEO input

In [None]:
df = pd.merge(geemap.ee_to_geopandas(roi), 
              df_out, 
              left_on='Geo_id', 
              right_on='Geo id', 
              how='inner')

In [None]:
df[plot_id_column] = df.index +1
df.set_index(plot_id_column, inplace = True)

In [None]:
ceo_cols = df_out.columns

In [None]:
df[ceo_cols].columns

In [None]:
gdf= df[ceo_cols]

In [None]:
ceo_cols_for_exporting = tidy_tables.truncate_strings_in_list(ceo_cols_for_exporting,10) # for shapefile export

In [None]:
gdf.columns = ceo_cols_for_exporting # dynamic list (albeit col names still need fixing CEO end)

In [None]:
gdf = gpd.GeoDataFrame(gdf,
                       geometry=df.geometry,
                       crs="EPSG:4326")

In [None]:
from modules.file_to_ceo import get_ceo_url

In [None]:
gdf.to_file(filename='test_ceo_all.shp.zip', driver='ESRI Shapefile')

ceo_url = get_ceo_url("./test_ceo_all.shp.zip") # getting login errors for my account only (Andy), so temp not running this

### 5. Create Map 

In [None]:
images_iCol_for_viz = images_iCol_filt.sort("dataset_order").filter(ee.Filter.neq("country_allocation_stats_only",1))

In [None]:
Map = geemap.Map()

images_names_list = images_iCol_for_viz.aggregate_array("system:index").getInfo()

for dataset_name in images_names_list:
    
    image_new = images_iCol_for_viz.filter(
        ee.Filter.eq("system:index", dataset_name)).first()
    
    viz_hex_code =  list(lookup_gee_datasets["viz_hex_code"]
                     [(lookup_gee_datasets["dataset_name"]==dataset_name)])[0]
    
    if debug: print ("adding image","-",dataset_name)
        
    visParams_sel = {'min': 0,'max': 1,'palette':['White',viz_hex_code]}
    
    Map.addLayer(image_new.gt(0).unmask(),visParams_sel,dataset_name,0,1)
    
    
Map.addLayer(roi,{},'roi ',1,1)

# if len(buffer_stats_list)>=1:
#     Map.addLayer(roi_buffer,{},'roi buffer')

if debug: print ("All layers added")

In [None]:
sc = Sidecar(title='Check Plots')
with sc:display(Map)

## Display outputs

### 6. Zoom on polygon

In [None]:
# number/index from list of ROI features - the selected feature is shown on the map. e.g., choose 0 for first in the list 
plotid = 2

In [None]:
#choose how close to zoom to chosen polygon (1-24, where 24 is fully zoomed in) 
zoom_level = 16 

single_feature_id = roi.aggregate_array(geo_id_column).get(plotid-1).getInfo()
if debug: print (geo_id_column,single_feature_id)
single_feature = ee.Feature(roi.filter(ee.Filter.eq(geo_id_column,single_feature_id)).first())

Map.centerObject(single_feature,zoom_level)

### 7. Display table

In [None]:
df[ceo_cols]

### 8. Generate CEO URL

In [None]:
print(ceo_url) # getting login errors for my account only (Andy), so temp not running this

### 9. Store image collection as an asset (optional)

Faster calculations if stored as a new asset. Can be run to update periodically. For parameters see: parameters/config_runtime_parameters.py

In [None]:
if export_icol:
    export_image_collection_to_asset(
        make_empty_image_coll=make_empty_image_coll, 
        image_col_to_export=images_iCol_filt, 
        target_image_col_id=target_image_col_id, 
        exportRegion=exportRegion, 
        skip_export_if_asset_exists=skip_export_if_asset_exists, 
        asset_exists_property="system:index",
        debug=debug)