In [1]:
########################################
# THIS DEMO REQUIRES INPUT PARAMETERS! #
########################################

# 1. A path to your JSON private key associated with a Google Cloud Project service account that has Google Earth Engine
#    permissions. As of November 13, 2024, all users will need to use a Cloud project in order to access Earth Engine.
#    More information how on to obtain a private key can be found here.
#    https://developers.google.com/earth-engine/guides/service_account

json_key = 'path/to/key'
# Example path below #
# json_key = r'C:\Users\ChrisRedfield\Documents\credentials\robust-raster-cefdsh6482c.json'

In [None]:
##############
# HOW TO RUN #
##############

# 1. Create the test Dask cluster. This will be used to auto-determine the appropriate block size
#    for your machine! 
from robustraster import dask_cluster_manager

dask_cluster = dask_cluster_manager.DaskClusterManager()
dask_cluster.create_cluster(mode="test")

In [None]:
# 2. Authenticate Google Earth Engine on all Dask workers.
from robustraster import dask_plugins

ee_plugin = dask_plugins.EEPlugin(json_key)
dask_client = dask_cluster.get_dask_client
dask_client.register_plugin(ee_plugin)

In [5]:
# 3. Obtain the header information for the Earth Engine query and store it in an xarray object.
#    This code does not do a full query for the data (yet)! 

#    In this example, we are just querying some data from Landsat 8 imagery 
#    over a small watershed for demo purposes.

from robustraster import dataset_manager
import ee
import json

# Although we authenticated Google Earth Engine on our Dask workers, we also need to authenticate
# Google Earth Engine on our local machine!
with open(json_key, 'r') as file:
    data = json.load(file)
credentials = ee.ServiceAccountCredentials(data["client_email"], json_key)
ee.Initialize(credentials = credentials, opt_url='https://earthengine-highvolume.googleapis.com')

# Basic cloud masking algorithm
def prep_sr_l8(image):
    # Bit 0 - Fill
    # Bit 1 - Dilated Cloud
    # Bit 2 - Cirrus
    # Bit 3 - Cloud
    # Bit 4 - Cloud Shadow
    qa_mask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturation_mask = image.select('QA_RADSAT').eq(0)

    # Apply the scaling factors to the appropriate bands.
    optical_bands = image.select('SR_B.*').multiply(0.0000275).add(-0.2)
    thermal_bands = image.select('ST_B.*').multiply(0.00341802).add(149.0)

    # Replace the original bands with the scaled ones and apply the masks.
    return (image.addBands(optical_bands, None, True)
                 .addBands(thermal_bands, None, True)
                 .updateMask(qa_mask)
                 .updateMask(saturation_mask))

WSDemo = ee.FeatureCollection("projects/robust-raster/assets/boundaries/WSDemoSHP_Albers")

# This is a dictionary that my code requires. You don't have to touch anything here for demo purposes
# (although it should work with anything, in theory. Feel free to change it if you'd like).
# These parameters get stored and are used to generate the header information needed when we do the full
# run.
parameters = {
            'collection': 'LANDSAT/LC08/C02/T1_L2',
            'bands': ['SR_B4', 'SR_B5'],
            'start_date': '2020-05-01',
            'end_date': '2020-08-31',
            'geometry': WSDemo.geometry(),
            'crs': 'EPSG:3310',
            'scale': 30,
            'map_function': prep_sr_l8
        }

earth_engine = dataset_manager.EarthEngineDataset(parameters)

In [13]:
# 4. Design your function here! 
 
# My target audience are for users who want to work with
# data frames, so pandas data frames are the only data structures 
# that I support for writing functions. If you'd prefer working 
# with xarrays (or possible other data structures), submit an
# issue and let me know!

# For this demo, we do a basic NDVI calculation.
def compute_ndvi(df):
    # Perform your calculations
    df['ndvi'] = (df['SR_B5'] - df['SR_B4']) / (df['SR_B5'] + df['SR_B4'])
    return df

In [None]:
# 5. This code will auto-determine what the best block size
#    should be for your machine. This helps to ensure computations don't 
#    go over resources available and crash your application. Skip this step
#    if want to process the entire dataset as is.
from robustraster import udf_manager

user_defined_func = udf_manager.UserDefinedFunction()
user_defined_func.tune_user_function(earth_engine, compute_ndvi, None)

In [None]:
# 6. Shutdown the test cluster and recreate a Dask cluster with
#    full resources needed for the full computation.
dask_client.shutdown()
dask_cluster.create_cluster(mode="full")

In [None]:
# 7. Rauthenticate the new Dask workers with Earth Engine.
dask_client = dask_cluster.get_dask_client
dask_client.register_plugin(ee_plugin)

In [None]:
# 8. Do the full run and write the results to a geoTIFF!
result = user_defined_func.export_and_apply_user_function(earth_engine, compute_ndvi)