### Process stats for multiband image

In [1]:
import ee
import pandas as pd
import geemap
import geopandas as gpd
import modules.tidy_tables as tidy_tables
import json
import numpy as np
import time

from modules.gee_initialize import initialize_ee 

initialize_ee()

from parameters.config_runtime import country_dataset_name
from parameters.config_output_naming import geo_id_column, geometry_area_column, plot_id_column

from parameters.config_lookups import lookup_gee_datasets

## get lists from lookup for different streams of processing
from  dataset_properties.make_processing_lists_from_lookup import buffer_stats_list,\
                                                                  presence_only_flag_list,\
                                                                  country_allocation_stats_only_list, \
                                                                  plot_stats_list, \
                                                                  decimal_place_column_list

Earth Engine has been initialized with the specified credentials.


In [2]:
%run alt_whisp_prep_multiband_image.ipynb # get stats and image

In [3]:

def process_dataframe(df, geo_id_column, geometry_area_column, country_dataset_name, lookup_gee_datasets,
                      presence_only_flag_list, decimal_place_column_list, plot_id_column, keep_system_index=False, debug=False):
    """tidying data frame based on a series of lists of strings for handling columns differently"""

    if debug: st = time.time()
  
    if geo_id_column in df.columns:
        prefix_columns_list =[geo_id_column, geometry_area_column, "Country"]     
    else:
        prefix_columns_list =[geometry_area_column, "Country"] 
        print(geo_id_column, "column not found, so skipping inclusion")
    
    #for adding geo_ids from asset registry later
    if keep_system_index: prefix_columns_list.append("system:index")

    df_out = tidy_tables.reorder_columns_by_lookup(
        df, lookup_gee_datasets, 'dataset_order', "dataset_name", prefix_columns_list
    ).drop(country_dataset_name, axis=1, errors='ignore')
    
    # convert positive results values to "True" for specific columns
    df_out[presence_only_flag_list] = np.where(df_out[presence_only_flag_list] > 0, "True", "-")
    
    # fix decimals
    df_out[decimal_place_column_list] = df_out[decimal_place_column_list].round(decimals=0, out=None).astype(int)
    
    # add in plot id as a simple id for quick cross referencing e.g. if shown on a map
    df_out[plot_id_column] = df_out.index + 1
    
    #set as an index
    df_out.set_index(plot_id_column, inplace=True)
    
    # remove underscores in column names
    df_out.columns = df_out.columns.str.replace('_', ' ')
    
    if debug: print('...table tidying time:', time.time() - st, 'seconds.\n Processing finished')
    
    return df_out

In [None]:
st = time.time()

%store -r roi

print (ee.FeatureCollection(roi).size().getInfo())

stats= get_stats(roi)

if ee.FeatureCollection(roi).size().getInfo()>500:#update this code to depend on a variable? or if memory error?
    
    task = ee.batch.Export.table.toDrive(collection=stats, 
                     description="Whisp_output_table_export", 
                     folder="Whisp_results",
                     fileFormat="CSV")

    task.start()
    
    df_out =  "Table exporting to google drive: 'Whisp_results/Whisp_output_table_export'"
    
    print (df_out)
else: 
    print ('...stats processing started')
    
    # print ('...ee processing time:', time.time() - st, 'seconds')# get the execution time
    
    # st = time.time()
    
    df = geemap.ee_to_df(stats)
    
    # print(df.head())
     
    df_out = process_dataframe(df, geo_id_column, geometry_area_column, country_dataset_name, lookup_gee_datasets,
                          presence_only_flag_list, decimal_place_column_list, plot_id_column)

    print ('...ee to df processing time:', time.time() - st, 'seconds')# get the execution time
