# Imports

In [1]:
import requests
import json
import ee
import geemap
import folium
import os
import pandas as pd
import glob
import time
import math
import numpy as np
from datetime import datetime

from modules.json_to_ee import json_to_feature_with_id
import modules.agstack_to_gee as agstack_to_gee
import modules.area_stats as area_stats
import modules.tidy_tables as tidy_tables

from parameters.config_runtime import * 

print ("imports complete")

imports complete


In [2]:
ee.Initialize()

### Parameters

In [3]:
from parameters import *

### 1. Grab datasets


In [10]:
if use_existing_image_collection:
    images_IC = ee.ImageCollection("users/andyarnell10/fdap_dpi/imageCol_trial_2")
else:
    from dataset_properties.set_image_properties import images_IC


### 2. Fetch some fields (public)

#### Transform geometries into a feature collection

In [11]:
CIV_ids = ['0520cfac98fbc1bd7952b1c07a9f6983b83625722b6f665ea83ac9aad3512918',
           'b84f55de2b7f3c77d1cbeb8b026a1b29be42d8b08d92058c9143e0556456820f',
           'b7c15efb6e3c63fcfe649a2d994973a6f5caa844f720f0edb7cf24f6a6c3c1b3',
            'fa2aff0d60cf1bc0e1f1dd4b91daf932940c31c021ca1b84f5b9445855eef02f']

GHA_ids = ['88bec54ad04804f5b1fafbc131266640a129be2840fa6797cda358d7e831b907', 
'ef2f7c46fbe4fc892fdb81f9a31c9c507b9f1e4548504247dcbbab28cf8e436c',
'97408ef7bdac487e4a42e4abf20492b786310889fd4b0478603e2d0004c40bfb']

IDN_ids = ['c288d6c94efa9011c0e3452af9f7fa0941661377030e10d29c68764617f9816d', 
       '1a41a309ae2387f36a604c9a6c81887e64357a7f61d228758e23ef766286fcd7',
       '1a4472dc40700ef33f931863f58d444f243d64418616678fcf85c57e1f4bbf45',
       '8e2accea7ddbb84b7f6001e00bcb60f57f563c80633b53859993522a6f05727a']

all_geo_ids= CIV_ids+GHA_ids+IDN_ids

if debug: print (all_geo_ids)

['0520cfac98fbc1bd7952b1c07a9f6983b83625722b6f665ea83ac9aad3512918', 'b84f55de2b7f3c77d1cbeb8b026a1b29be42d8b08d92058c9143e0556456820f', 'b7c15efb6e3c63fcfe649a2d994973a6f5caa844f720f0edb7cf24f6a6c3c1b3', 'fa2aff0d60cf1bc0e1f1dd4b91daf932940c31c021ca1b84f5b9445855eef02f', '88bec54ad04804f5b1fafbc131266640a129be2840fa6797cda358d7e831b907', 'ef2f7c46fbe4fc892fdb81f9a31c9c507b9f1e4548504247dcbbab28cf8e436c', '97408ef7bdac487e4a42e4abf20492b786310889fd4b0478603e2d0004c40bfb', 'c288d6c94efa9011c0e3452af9f7fa0941661377030e10d29c68764617f9816d', '1a41a309ae2387f36a604c9a6c81887e64357a7f61d228758e23ef766286fcd7', '1a4472dc40700ef33f931863f58d444f243d64418616678fcf85c57e1f4bbf45', '8e2accea7ddbb84b7f6001e00bcb60f57f563c80633b53859993522a6f05727a']


#### Asset registry: start session

NB this is timing out so skipping section - seems to work without somehow. Maybe an open connection already...

In [12]:
## using session to store cookies that are persistent
session = requests.session()
session.headers = headers = {
    'Accept': 'application/json',
    'Content-Type': 'application/json'
}
req_body = {'email': email, 'password': password}
res = session.post(user_registry_base, json=req_body)
if debug: print (session.cookies)
if debug: print (res.status_code)


<RequestsCookieJar[]>
500


#### Fetch features

In [13]:
#is there a list of geo_ids
if isinstance(all_geo_ids, list):
    multiple_inputs=True
elif isinstance(all_geo_ids, str):
    multiple_inputs=False
else:
    print ("Input must be a single string or list of strings")

#if list of geo ids use loop over them and make a feature collection

if multiple_inputs==True:
    roi = agstack_to_gee.geo_id_list_to_feature_collection(all_geo_ids,geo_id_column, session,asset_registry_base)    
    if debug: print ("Count of geo ids in list: ", len(all_geo_ids))
    # if debug: print ("Count of features in FeatureCollection: ", roi.size().getInfo())
elif multiple_inputs == False: 
    roi = ee.FeatureCollection(agstack_to_gee.geo_id_to_feature(all_geo_ids,geo_id_column, session,asset_registry_base))
    if debug: print ("Geo id input: ", all_geo_ids)
    # if debug: print ("Geo id associated with new feature: ", roi.get(geo_id_column).getInfo())    
else: 
    print("no ee.Object created: check input format")

# if debug: print ("Count of geo ids in list: ", len(all_geo_ids))
if debug: print ("Count of features in FeatureCollection: ", roi.size().getInfo())
    
#true
#checks 

Count of geo ids in list:  11
Count of features in FeatureCollection:  11


#### Feature prep
- Create additional buffer zones for deforestation risk 
- Add area property to feature(s) 
- Select only columns of interest

In [14]:
roi = area_stats.add_area_hectares_property_to_feature_collection(roi,geometry_area_column)

roi  = roi.select([geometry_area_column,geo_id_column]) ##select only fields of interest

roi_alerts_buffer = roi.map(lambda feature: 
        feature.buffer(local_alerts_buffer_radius,max_error_alert_buff))

# if debug: geemap.ee_to_pandas(roi_alerts_buffer)
# if debug: geemap.ee_to_pandas(roi)

#### 3) Compute statistics

Calculating zonal statistics for continuous data (e.g tree cover) within polygon(s)

##### i) Mapping over image collection with reduce regions: creates long format raw stats

In [15]:
# get the start time
st = time.time()
if debug: print ("processing stats...")

## reducer choice for zonal statistics
reducer_choice = ee.Reducer.sum().combine(  #main stats based on area of pixel
  reducer2=ee.Reducer.count(),sharedInputs=True).combine(reducer2=ee.Reducer.mode(), sharedInputs=True) ##mode used for country allocation (majority pixel count on country code raster)

#get stats for roi (except alerts)
fc_stats_combined = area_stats.reduceStatsIC(roi,
                                  images_IC.filter(ee.Filter.And(ee.Filter.neq("alerts_buffer",1),
                                                                 (ee.Filter.neq("dataset_id",16)))), # country code calculation seperated, ideally use flag in the data 
                                                                  reducer_choice)# all but alerts
#get stat for buffer (alerts only)
fc_stats_combined_buffer = area_stats.reduceStatsIC(roi_alerts_buffer,
                                                    images_IC.filter(ee.Filter.And(ee.Filter.eq("alerts_buffer",1))),
                                                    reducer_choice) #alerts only

#combine stats from roi and buffer
fc_stats_combined_all = fc_stats_combined.merge(fc_stats_combined_buffer) # combining alerts with others into one feature collection

# convert to Pandas Dataframe
df_combined = geemap.ee_to_pandas(fc_stats_combined_all) # limit of 5000 (unlikely to need more but i have code for it if needed)

# export dataframe to csv
df_combined.to_csv(path_or_buf=out_file_long,header=True,index=False)

# get the execution time
elapsed_time = time.time() - st

if debug: print ('Total execution time:', elapsed_time, 'seconds')


processing stats...
Total execution time: 2.182441234588623 seconds


##### ii): Create lookup tables for country allocation
Approach is based on raster stats and listing the country for a specific geometry based on which has most overlap



Look up table linking country codes to country names (from GAUL feature collection) is stored here: scripts: create_country_lookup.py

Make on-the-fly look up table to link country name to geo id based on raster stats
- uses rasterised GAUL layer with admin codes as pixel values
- for each geo id finds most common value in that geometry (i.e. "mode" statistic)

In [16]:
fc_stats_country_codes = area_stats.reduceStatsIC(roi,
                                  images_IC.filter(ee.Filter.eq("dataset_id",16)),
                                  reducer_choice)# all but alerts

df_stats_country_codes = geemap.ee_to_pandas(fc_stats_country_codes) # limit of 5000 (unlikely to need more fpr demo but i have code for it if this happens)

lookup_geo_id_to_GAUL_codes = df_stats_country_codes[df_stats_country_codes["dataset_name"]=="GAUL_boundaries_adm0_code_reproj"]  #get mode stats for GAUL dataset

lookup_geo_id_to_GAUL_codes = lookup_geo_id_to_GAUL_codes[[geo_id_column, 'mode']] # choose only columns needed

# lookup_geo_id_to_GAUL["mode"] = lookup_geo_id_to_GAUL["mode"].astype(int) # make sure mode stats are integer (to allow joining)

lookup_geo_id_to_GAUL_codes = lookup_geo_id_to_GAUL_codes.rename(columns={"mode":"ADM0_CODE"}) # change names for a clean join 

lookup_geo_id_to_GAUL_country_names = lookup_geo_id_to_GAUL_codes.merge(lookup_country_codes_to_names,on="ADM0_CODE",how="inner").drop("ADM0_CODE",axis=1) # join geo id to the GAUL_lookup_table countaining "Country_names"


##### iii) Reformat results table
- long to wide
- convert to proportions 
- set presence only flags
- add in country names (using lookup tables) to the final results

In [17]:
#add proprtion column
df_combined["percentage"] = (df_combined["sum"]/df_combined[geometry_area_column])*100

# geometry_area_lookup = df_combined[geometry_area_column,geo_id_column]

def tidy_dataframe_after_pivot (df):
    """Tidying dataframe after long-to-wide reformatting, incl. removes unwanted levels, column names"""
    # df.columns = df.columns.droplevel(0) #remove sum
    df.columns = df.columns.get_level_values(1)
    df.columns.name = None               #remove "dataset_name" label
    df = df.reset_index()    #index to columns
    return df

#convert to wide format (one row per geo_id)
df_wide_format = df_combined.pivot_table(index=[geo_id_column,geometry_area_column],columns=['dataset_name'],values=['percentage'])

# # #tidy unwanted headers etc
tidy_tables.tidy_dataframe_after_pivot(df_wide_format) #runs in place so no need to assign

df_wide_format.columns = df_wide_format.columns.get_level_values(0)

# #list images with with presence_only_flag property 
flag_list = images_IC.filter(ee.Filter.eq("presence_only_flag",1)).aggregate_array("system:index").getInfo()

# convert positive results values to "True" for specific columns
for column in flag_list: df_wide_format[column]=np.where(df_wide_format[column]>0,"True","-")

# # tidy output - decimal places
columns_list = df_wide_format.columns.values.tolist()
if debug: print (columns_list)
non_flag_columns_list = [x for x in columns_list if x not in flag_list]

for column in flag_list: df_wide_format[non_flag_columns_list]=df_wide_format[non_flag_columns_list].round(decimals=0, out=None).astype(int)

df_wide_format=df_wide_format.reset_index()

# #joins country name based on majority overlap with country 
df_wide_format = df_wide_format.merge(lookup_geo_id_to_GAUL_country_names,on=geo_id_column)


['Cocoa_plantations_Kalischek', 'ESRI_Trees_2020', 'FDaP_palm_plantations', 'GFC_Tree_Cover_2020', 'GLAD_LULC_Stable_Tree_2020', 'JAXA_Forest_non_forest_2020', 'Key_Biodiversity_Area', 'Local_RADD_alerts', 'Oil_palm_Descals', 'Other_Effective_area_based_Conservation_Measure', 'Primary_Humid_Tropical_Forest_2020', 'Protected_area', 'TMF_disturbed_forest_2020', 'TMF_plantation', 'TMF_undisturbed_forest_2020']


##### iv) Further reformatting and exporting
- reorder columns
- remove underscores in column titles
- export to csv

In [18]:
##tidy 

# reorder columns using list 
df_wide_format[geometry_area_column]=df_wide_format[geometry_area_column].round(decimals=2, out=None)

ordered_dataset_df= lookup_gee_datasets.sort_values(by=['datasets_order'])

column_order_list = list(ordered_dataset_df["dataset_name"])
                         
column_order_list.insert(0,geo_id_column) # add in the "geo_id" column into to datasets list

column_order_list.insert(1,geometry_area_column)# add in to list the geometry area column

column_order_list.remove("GAUL_boundaries_adm0_code_reproj") # remove old column with "mode" values (not now relevant as have country names)

column_order_list.insert(2,"Country")# add in to list the new column with country names

df_wide_format= df_wide_format.reindex(columns=column_order_list) # reorder by list

df_wide_format["Country"]=np.where(df_wide_format["Country"]=="C�te d'Ivoire","Côte d'Ivoire",df_wide_format["Country"])# TEMP fix on characters (encoding issues)
df_wide_format["Country"]=np.where(df_wide_format["Country"]=="R�union","Réunion",df_wide_format["Country"])# TEMP fix on characters (encoding issues)

# remove underscores in columns
df_wide_format.columns = df_wide_format.columns.str.replace('_', ' ')

# #export wide format csv
df_wide_format.to_csv(path_or_buf=out_file_wide,header=True)

# if debug: print ("output csv: ", out_file_wide)

#checks
if debug: flag_list
# if debug: print (columns_list)

df_wide_format

Unnamed: 0,Geo id,Shape area hectares,Country,GFC Tree Cover 2020,ESRI Trees 2020,JAXA Forest non forest 2020,GLAD LULC Stable Tree 2020,TMF undisturbed forest 2020,Primary Humid Tropical Forest 2020,TMF disturbed forest 2020,Local RADD alerts,TMF plantation,Oil palm Descals,FDaP palm plantations,Cocoa plantations Kalischek,Protected area,Other Effective area based Conservation Measure,Key Biodiversity Area
0,0520cfac98fbc1bd7952b1c07a9f6983b83625722b6f66...,8.32,Côte d'Ivoire,62.031492,99.550667,95.17824,63.789679,0.272691,0.0,10.236428,0.0,0.0,0.0,0.0,0.0,99.513226,0.0,99.513226
1,1a41a309ae2387f36a604c9a6c81887e64357a7f61d228...,1.97,Indonesia,0.183185,99.527189,84.876936,99.362264,0.0,0.0,13.250976,0.0,0.0,0.0,3.900593,0.0,0.0,0.0,0.0
2,1a4472dc40700ef33f931863f58d444f243d6441861667...,12.78,Indonesia,82.789901,99.540867,99.631658,98.760523,60.021967,0.0,39.417561,0.0,0.0,0.0,68.386416,0.0,0.0,0.0,0.0
3,88bec54ad04804f5b1fafbc131266640a129be2840fa67...,1.95,Ghana,20.368075,99.543633,99.505759,87.681352,0.0,0.0,34.124126,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,8e2accea7ddbb84b7f6001e00bcb60f57f563c80633b53...,20.98,Indonesia,53.48318,99.545613,95.874315,98.063017,30.846134,0.0,66.815134,0.0,0.0,0.0,50.941279,0.0,0.0,0.0,0.0
5,97408ef7bdac487e4a42e4abf20492b786310889fd4b04...,16.68,Ghana,99.538465,99.558175,99.539157,89.009291,94.668147,86.803055,4.859822,1.021872,0.0,0.0,0.0,0.0,96.554817,0.0,0.0
6,b7c15efb6e3c63fcfe649a2d994973a6f5caa844f720f0...,3.82,Côte d'Ivoire,69.67858,99.537431,99.47529,89.961167,0.0,0.0,74.054764,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,b84f55de2b7f3c77d1cbeb8b026a1b29be42d8b08d9205...,1.99,Côte d'Ivoire,78.314306,99.520395,72.582346,55.443102,0.0,0.0,23.456662,0.0,0.0,0.0,0.0,73.285679,99.435492,0.0,0.0
8,c288d6c94efa9011c0e3452af9f7fa0941661377030e10...,31.35,Indonesia,0.437212,5.569134,5.085344,83.099963,0.0,0.0,0.0,0.0,99.522028,98.448346,81.982111,0.0,0.0,0.0,0.0
9,ef2f7c46fbe4fc892fdb81f9a31c9c507b9f1e45485042...,4.17,Ghana,35.411125,99.535517,99.040828,63.026645,4.679829,0.0,82.816275,0.0,0.0,0.0,0.0,0.005661,0.0,0.0,0.0


#### Display layers

##### Loop through image collection and loads layers to be added to the map


In [None]:
Map = geemap.Map()
visParams =  {'min': 0,'max': 1,'palette':['White','Green']}

for i in range(images_IC.size().getInfo()):
    
    image_new = ee.Image(images_IC.toList(100,0).get(i))
    
    dataset_name = image_new.get("system:index").getInfo()
    
    if debug: print ("adding image",i,"-",dataset_name)
    Map.addLayer(image_new.gt(0).unmask(),visParams,dataset_name,0,1)
    
    
Map.addLayer(roi,{},'roi ',1,1)
# Map.addLayer(roi_alerts_buffer,{},'roi buffer zone')

if debug: print ("All layers added")    

adding image 0 - Cocoa_plantations_Kalischek


##### Show on map and zoom to a specific feature based on index number in feature collection
 Layers visibility off by default - toggle on in top right corner

In [None]:
# number/index from list of ROI features - the selected feature is shown on the map. e.g., choose 0 for first in the list 
feature_to_centre_on = 1

#choose how close to zoom to chosen polygon (1-24, where 24 is fully zoomed in) 
zoom_level = 16 

single_feature_id = roi.aggregate_array(geo_id_column).get(feature_to_centre_on).getInfo()
if debug: print (geo_id_column,single_feature_id)
single_feature = ee.Feature(roi.filter(ee.Filter.eq(geo_id_column,single_feature_id)).first())

Map.centerObject(single_feature,zoom_level)
    
Map

### Logout (protected)

In [None]:
# res = session.get(asset_registry_base + "/logout")
# if debug: print (res.json())
# res = session.get(user_registry_base + "/logout", cookies=session.cookies)
# session.headers.clear()

### Checking if Logged out correctly

In [None]:
# # Confirming the logout from Asset Registry by requesting a Protected route
# req_body = {
#     "latitude": 31.47704430446457,
#     "longitude": 74.37510786779589
# }
# res = session.post(asset_registry_base + "/fetch-fields-for-a-point", json=req_body)
# if debug: print (res.json())

### Get all Domains (public)

In [None]:
# # Fetching all the domains from the User Registry
# res = session.get(asset_registry_base + "/domains")
# if debug: print (res.json())