# core

> Core functionality for distributing Earth Engine requests among Dask workers.

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import logging

import coiled
import dask.distributed
import ee
import google.auth

In [None]:
#| export
class InitEarthEngine(dask.distributed.WorkerPlugin):
    def __init__(self, **kwargs):
        logging.info('InitEarthEngine init')  # This appears in the notebook output where the cluster is initiated.
        self.kwargs = kwargs

    def setup(self, worker):
        logging.info('InitEarthEngine setup')  # This appears in the dask cluster logs.
        import ee
        ee.Initialize(**self.kwargs)


class ClusterGEE(coiled.Cluster):
    def __init__(self, **kwargs):
        logging.debug('ClusterGEE init')
        super().__init__(**kwargs)
        # Wait for the workers to start, then send the ADCs
        self.wait_for_workers(kwargs['n_workers'])
        coiled.credentials.google.send_application_default_credentials(self)

    def get_client(self):
        logging.debug('ClusterGEE get_client')
        client = super().get_client()
        client.register_plugin(InitEarthEngine())
        return client


# For local development
class LocalClusterGEE(dask.distributed.LocalCluster):    
    def __init__(self, **kwargs):
        logging.debug('LocalClusterGEE init')
        super().__init__(**kwargs)

    def get_client(self):
        logging.debug('LocalClusterGEE get_client')
        client = super().get_client()
        client.register_plugin(InitEarthEngine())
        return client

# Try it out

Create a cluster and run a few jobs.

## Authenticate & Initialize Earth Engine

Get credentials and the GCP project ID, authenticating if necessary.

In [None]:
try:
    credentials, project_id = google.auth.default()
except google.auth.exceptions.DefaultCredentialsError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
try:
    ee.Initialize(credentials=credentials, project=project_id)
except google.auth.exceptions.RefreshError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
ee.Initialize(credentials=credentials, project=project_id)

# Start Dask Cluster

Start up a Earth Engine enabled cluster. This may take a few minutes to complete.

In [None]:
# cluster = ClusterGEE(
#     name='test-class-cluster',
#     n_workers=1,
#     worker_cpu=4,
#     spot_policy="spot_with_fallback",
#     region='us-central1',
#     idle_timeout="1 hours",
# )
cluster = LocalClusterGEE()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 50794 instead


Retrieve a client for the cluster, and display it.

In [None]:
client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: __main__.LocalClusterGEE
Dashboard: http://127.0.0.1:50794/status,

0,1
Dashboard: http://127.0.0.1:50794/status,Workers: 5
Total threads: 10,Total memory: 64.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:50795,Workers: 5
Dashboard: http://127.0.0.1:50794/status,Total threads: 10
Started: Just now,Total memory: 64.00 GiB

0,1
Comm: tcp://127.0.0.1:50811,Total threads: 2
Dashboard: http://127.0.0.1:50817/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:50798,
Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-uhclrxt6,Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-uhclrxt6

0,1
Comm: tcp://127.0.0.1:50809,Total threads: 2
Dashboard: http://127.0.0.1:50815/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:50800,
Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-mxmx0chx,Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-mxmx0chx

0,1
Comm: tcp://127.0.0.1:50810,Total threads: 2
Dashboard: http://127.0.0.1:50816/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:50802,
Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-qf20mh5l,Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-qf20mh5l

0,1
Comm: tcp://127.0.0.1:50808,Total threads: 2
Dashboard: http://127.0.0.1:50814/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:50804,
Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-6kosms5z,Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-6kosms5z

0,1
Comm: tcp://127.0.0.1:50812,Total threads: 2
Dashboard: http://127.0.0.1:50813/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:50806,
Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-a1wn1vpy,Local directory: /var/folders/1s/21_zf6j56vs_t0j6klrw2znm0000gn/T/dask-scratch-space/worker-a1wn1vpy


# Submit Jobs

Test it out by:
- Defining a function that can be distributed,
- Submitting jobs running the function to workers, 
- Gathering the results locally, and
- Displaying the results

In [None]:
# Get a list of countries to analyze.
country_fc = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017')
country_list = country_fc.aggregate_array('country_na').distinct().sort().getInfo()

import random

# Write a function that can be run by the cluster workers. 
def get_country_stats(country_name):
    country = country_fc.filter(ee.Filter.eq('country_na', country_name))
    elev = ee.ImageCollection("COPERNICUS/DEM/GLO30").select('DEM').mosaic()
    return {
        'country': country_name, 
        'area_km2': country.geometry().area().multiply(1e-6).round().getInfo(), 
        'mean_elev': elev.reduceRegion(reducer=ee.Reducer.mean(),
                                       geometry=country.geometry(),
                                       scale=10000,
                                       ).get('DEM').getInfo(),
    }


# Create and submit jobs among the workers.
print('Submitting jobs...')
futures = [
    client.submit(get_country_stats, country, retries=3)
    for country in ['Abyei Area', 'Zimbabwe']
]
print('...done')

# Gather up the results and display them.
print('Gathering results...')
results = client.gather(futures)
print('...done')
results

Submitting jobs...
...done
Gathering results...
...done


[{'country': 'Abyei Area', 'area_km2': 10460, 'mean_elev': 402.5921903247955},
 {'country': 'Zimbabwe', 'area_km2': 391916, 'mean_elev': 973.2955548809969}]

# Shut down the cluster.

In [None]:
cluster.shutdown()

AttributeError: 'LocalClusterGEE' object has no attribute 'shutdown'

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()