# core

> Core functionality for distributing Earth Engine requests among Dask workers.

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import logging

In [None]:
#| export
import ee

In [None]:
#| export
import dask.distributed

In [None]:
#| export
import google.auth

In [None]:
#| export
import coiled

In [None]:
#| export
class InitEarthEngine(dask.distributed.WorkerPlugin):
    def __init__(self, **kwargs):
        logging.info('InitEarthEngine init')  # This appears in the notebook output where the cluster is initiated.
        self.kwargs = kwargs

    def setup(self, worker):
        logging.info('InitEarthEngine setup')  # This appears in the dask cluster logs.
        import ee
        ee.Initialize(**self.kwargs)
        

class ClusterGEE(coiled.Cluster):
    def __init__(self, **kwargs):
        logging.debug('ClusterGEE init')
        super().__init__(**kwargs)
        # Wait for the workers to start, then send the ADCs
        self.wait_for_workers(kwargs['n_workers'])
        coiled.credentials.google.send_application_default_credentials(self)

    def get_client(self):
        logging.debug('ClusterGEE get_client')
        client = super().get_client()
        client.register_plugin(InitEarthEngine())
        return client

# Try it out

Create a cluster and run a few jobs.

## Authenticate & Initialize Earth Engine

Get credentials and the GCP project ID, authenticating if necessary.

In [None]:
try:
    credentials, project_id = google.auth.default()
except google.auth.exceptions.DefaultCredentialsError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()

ee.Initialize(credentials=credentials, project=project_id)

# Start Dask Cluster

Start up a Earth Engine enabled cluster. This may take a few minutes to complete.

In [None]:
cluster = ClusterGEE(
    name='test-class-cluster',
    n_workers=2,
    worker_cpu=8,
    region='us-central1',
)

Output()

Google Application Default Credentials have been written to a file on your Coiled VM(s).
These credentials will potentially be valid until explicitly revoked by running
gcloud auth application-default revoke


Retrieve a client for the cluster, and display it.

In [None]:
client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: __main__.ClusterGEE
Dashboard: https://cluster-ufyqp.dask.host/FqddnuVpTbRxtrlw/status,

0,1
Dashboard: https://cluster-ufyqp.dask.host/FqddnuVpTbRxtrlw/status,Workers: 2
Total threads: 16,Total memory: 61.16 GiB

0,1
Comm: tls://10.2.0.13:8786,Workers: 2
Dashboard: http://10.2.0.13:8787/status,Total threads: 16
Started: 12 minutes ago,Total memory: 61.16 GiB

0,1
Comm: tls://10.2.0.12:33737,Total threads: 8
Dashboard: http://10.2.0.12:8787/status,Memory: 30.58 GiB
Nanny: tls://10.2.0.12:39245,
Local directory: /scratch/dask-scratch-space/worker-qv0i1fmx,Local directory: /scratch/dask-scratch-space/worker-qv0i1fmx

0,1
Comm: tls://10.2.0.11:40289,Total threads: 8
Dashboard: http://10.2.0.11:8787/status,Memory: 30.58 GiB
Nanny: tls://10.2.0.11:34949,
Local directory: /scratch/dask-scratch-space/worker-o47xuth5,Local directory: /scratch/dask-scratch-space/worker-o47xuth5


# Submit Jobs

Test it out by:
- Defining a function that can be distributed,
- Submitting jobs running the function to workers, 
- Gathering the results locally, and
- Displaying the results

In [None]:
# Get a list of countries to analyze.
country_fc = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017')
country_list = country_fc.aggregate_array('country_na').distinct().sort().getInfo()

# Write a function that can be run by the cluster workers. 
def get_country_stats(country_name):
    country = country_fc.filter(ee.Filter.eq('country_na', country_name))
    elev = ee.ImageCollection("COPERNICUS/DEM/GLO30").select('DEM').mosaic()
    return {
        'country': country_name, 
        'area_km2': country.geometry().area().multiply(1e-6).round().getInfo(), 
        'mean_elev': elev.reduceRegion(reducer=ee.Reducer.mean(),
                                       geometry=country.geometry(),
                                       scale=10000,
                                       ).get('DEM').getInfo(),
    }

# Create and submit jobs to among the workers.
submitted_jobs = [
    client.submit(get_country_stats, country)
    for country in ['Abyei Area', 'Zimbabwe']
]

# Gather up the results and display them.
results = client.gather(submitted_jobs)
results

[{'country': 'Abyei Area', 'area_km2': 10460, 'mean_elev': 402.5921903247955},
 {'country': 'Zimbabwe', 'area_km2': 391916, 'mean_elev': 973.2955548809969}]

Display a scatter plot of the data.

# Shut down the cluster.

In [None]:
cluster.shutdown()

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()