# core

> Core functionality for distributing Earth Engine requests among Dask workers.

In [1]:
#| default_exp core

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#| export
import logging

import coiled
import dask.distributed
import ee
import google.auth

In [4]:
#| export
class InitEarthEngine(dask.distributed.WorkerPlugin):
    def __init__(self, **kwargs):
        logging.info('InitEarthEngine init')  # This appears in the notebook output where the cluster is initiated.
        self.kwargs = kwargs

    def setup(self, worker):
        logging.info('InitEarthEngine setup')  # This appears in the dask cluster logs.
        import ee
        ee.Initialize(**self.kwargs)


class ClusterGEE(coiled.Cluster):
    def __init__(self, **kwargs):
        logging.debug('ClusterGEE init')
        super().__init__(**kwargs)
        # Wait for the workers to start, then send the ADCs
        self.wait_for_workers(kwargs['n_workers'])
        coiled.credentials.google.send_application_default_credentials(self)

    def get_client(self):
        logging.debug('ClusterGEE get_client')
        client = super().get_client()
        client.register_plugin(InitEarthEngine())
        return client

# Try it out

Create a cluster and run a few jobs.

## Authenticate & Initialize Earth Engine

Get credentials and the GCP project ID, authenticating if necessary.

In [5]:
try:
    credentials, project_id = google.auth.default()
except google.auth.exceptions.DefaultCredentialsError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
try:
    ee.Initialize(credentials=credentials, project=project_id)
except google.auth.exceptions.RefreshError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
ee.Initialize(credentials=credentials, project=project_id)

# Start Dask Cluster

Start up a Earth Engine enabled cluster. This may take a few minutes to complete.

In [6]:
cluster = ClusterGEE(
    name='test-class-cluster',
    n_workers=1,
    worker_cpu=4,
    spot_policy="spot_with_fallback",
    region='us-central1',
    idle_timeout="1 hours",
)

Output()

Output()

Google Application Default Credentials have been written to a file on your Coiled VM(s).
These credentials will potentially be valid until explicitly revoked by running
gcloud auth application-default revoke


Retrieve a client for the cluster, and display it.

In [7]:
client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: __main__.ClusterGEE
Dashboard: https://cluster-snhin.dask.host/cyvgqEolA7_0BP9W/status,

0,1
Dashboard: https://cluster-snhin.dask.host/cyvgqEolA7_0BP9W/status,Workers: 1
Total threads: 4,Total memory: 15.02 GiB

0,1
Comm: tls://10.2.0.45:8786,Workers: 1
Dashboard: http://10.2.0.45:8787/status,Total threads: 4
Started: Just now,Total memory: 15.02 GiB

0,1
Comm: tls://10.2.0.44:35583,Total threads: 4
Dashboard: http://10.2.0.44:8787/status,Memory: 15.02 GiB
Nanny: tls://10.2.0.44:33087,
Local directory: /scratch/dask-scratch-space/worker-t8e3fxcr,Local directory: /scratch/dask-scratch-space/worker-t8e3fxcr


# Submit Jobs

Test it out by:
- Defining a function that can be distributed,
- Submitting jobs running the function to workers, 
- Gathering the results locally, and
- Displaying the results

In [8]:
# Get a list of countries to analyze.
country_fc = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017')
country_list = country_fc.aggregate_array('country_na').distinct().sort().getInfo()

import random

# Write a function that can be run by the cluster workers. 
def get_country_stats(country_name):
    country = country_fc.filter(ee.Filter.eq('country_na', country_name))
    elev = ee.ImageCollection("COPERNICUS/DEM/GLO30").select('DEM').mosaic()
    return {
        'country': country_name, 
        'area_km2': country.geometry().area().multiply(1e-6).round().getInfo(), 
        'mean_elev': elev.reduceRegion(reducer=ee.Reducer.mean(),
                                       geometry=country.geometry(),
                                       scale=10000,
                                       ).get('DEM').getInfo(),
    }


# Create and submit jobs among the workers.
print('Submitting jobs...')
futures = [
    client.submit(get_country_stats, country, retries=3)
    for country in ['Abyei Area', 'Zimbabwe', 'Canada']
]
print('...done')

# Gather up the results and display them.
print('Gathering results...')
results = client.gather(futures)
print('...done')
results

Submitting jobs...
...done
Gathering results...
...done


[{'country': 'Abyei Area', 'area_km2': 10460, 'mean_elev': 402.5921903247955},
 {'country': 'Zimbabwe', 'area_km2': 391916, 'mean_elev': 973.2955548809969},
 {'country': 'Canada', 'area_km2': 9803738, 'mean_elev': 452.6098530254492}]

# Shut down the cluster.

In [9]:
cluster.shutdown()

In [10]:
#| hide
import nbdev; nbdev.nbdev_export()