# Example: Forested Area by Admin Level 2

## Setup

In [1]:
import ee
from earthengine_dask.core import ClusterGEE
import google.auth

## Authenticate & Initialize Earth Engine

Get credentials and the GCP project ID, authenticating if necessary.

In [2]:
try:
    credentials, project_id = google.auth.default()
except google.auth.exceptions.DefaultCredentialsError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
try:
    ee.Initialize(credentials=credentials, project=project_id)
except google.auth.exceptions.RefreshError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
ee.Initialize(credentials=credentials, project=project_id)

# Input Data

## Input: Forest Baseline

This example will use the [European Commission Joint Research Centre's 2020 global map of forest cover](https://data.jrc.ec.europa.eu/dataset/10d1b337-b7d1-4938-a048-686c8185b290) for the forest baseline. The dataset is [available in Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/JRC_GFC2020_V1).

In [3]:
ic = ee.ImageCollection("JRC/GFC2020/V1")

In [4]:
print(f'There is {ic.size().getInfo()} image in the collection.')

There is 1 image in the collection.


... which we will use as the forest baseline.

In [5]:
forest_baseline = ic.first()

Looking at the projection information, the image is in decimal degrees of latitude and longitude (EPSG:4326).

In [6]:
proj_info = forest_baseline.projection().getInfo()
proj_info

{'type': 'Projection',
 'crs': 'EPSG:4326',
 'transform': [8.983152841195215e-05,
  0,
  -170.00005897568744,
  0,
  -8.983152841195215e-05,
  80.03737653225383]}

In [7]:
print(f'The nominal scale (at the equator) is '
      f'{forest_baseline.projection().nominalScale().getInfo()} meters/pixel.')

The nominal scale (at the equator) is 10 meters/pixel.


## Input: Administrative Boundaries

We will use the municipal level (ADM2) boundaries provided by the [geoBoundaries](https://www.geoboundaries.org/) global database of political administrative boundaries v6.0, which is also [available in Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/WM_geoLab_geoBoundaries_600_ADM2).

In [8]:
admin = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM2")
# admin = admin.filter(ee.Filter.eq('shapeGroup', 'USA'))
# admin = admin.filter(ee.Filter.eq('shapeName', 'Colorado'))
# admin = admin.filter(ee.Filter.eq('shapeName', 'Boulder'))

# roi = ee.Geometry.Polygon(
#         [[[-109.01952260759319, 40.971552045695994],
#           [-109.01952260759319, 37.01127149086416],
#           [-101.99925893571819, 37.01127149086416],
#           [-101.99925893571819, 40.971552045695994]]], None, False)
# admin = admin.filterBounds(roi)

There are quite a few features in the collection.

In [9]:
print(f'There are {admin.size().getInfo()} features in the collection.')

There are 49617 features in the collection.


In [10]:
# admin.aggregate_histogram('shapeName').getInfo()

# Analysis

Define a function that calculates the forested area, and adds it back to the feature.

In [11]:
def get_area(img, shape_id):
    fc = ee.FeatureCollection(
        admin.filter(ee.Filter.eq('shapeID', shape_id))
    )
    
    stats_sum = ee.Number(
        img.multiply(ee.Image.pixelArea()).reduceRegions(
            collection=fc,
            reducer=ee.Reducer.sum(),
        ).aggregate_array('sum').get(0)
    )
    
    return_dict = ee.Dictionary({
        'shapeName': fc.aggregate_array('shapeName').get(0),
        'shapeGroup': fc.aggregate_array('shapeGroup').get(0),
        'shapeType': fc.aggregate_array('shapeType').get(0),
        'area_km2': stats_sum.round().multiply(1e-6),
    }).getInfo()
    return_dict['shape_id'] = shape_id
    return return_dict

In [12]:
# # Use for debugging to count the total area, rather than forested area
# forest_baseline = forest_baseline.unmask().multiply(0).add(1)

In [13]:
# Try it out.
# shape_ids = admin.aggregate_array('shapeID').distinct().getInfo()
# shape_ids

In [14]:
# # tileScale=1
# get_area(img=forest_baseline, shape_id='42512837B26705409874577')

In [15]:
# # tileScale=16
# get_area(img=forest_baseline, shape_id='42512837B26705409874577')

## Start Dask Cluster

Start up a Earth Engine enabled cluster. This may take a few minutes to complete.

In [16]:
cluster = ClusterGEE(
    name='test-cluster-forest-by-admin-temp',
    n_workers=3,
    worker_cpu=8,
    # spot_policy="spot_with_fallback",
    region='us-west1',
    idle_timeout="4 hours",
)

Output()

Output()

Google Application Default Credentials have been written to a file on your Coiled VM(s).
These credentials will potentially be valid until explicitly revoked by running
gcloud auth application-default revoke




Retrieve a client for the cluster, and display it.

In [17]:
client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: earthengine_dask.ClusterGEE
Dashboard: https://cluster-tycwu.dask.host/EkZZTbw0iKRTOHKq/status,

0,1
Dashboard: https://cluster-tycwu.dask.host/EkZZTbw0iKRTOHKq/status,Workers: 3
Total threads: 24,Total memory: 91.77 GiB

0,1
Comm: tls://10.1.0.9:8786,Workers: 3
Dashboard: http://10.1.0.9:8787/status,Total threads: 24
Started: Just now,Total memory: 91.77 GiB

0,1
Comm: tls://10.1.0.8:35395,Total threads: 8
Dashboard: http://10.1.0.8:8787/status,Memory: 30.59 GiB
Nanny: tls://10.1.0.8:32977,
Local directory: /scratch/dask-scratch-space/worker-7k1yritw,Local directory: /scratch/dask-scratch-space/worker-7k1yritw

0,1
Comm: tls://10.1.0.7:45921,Total threads: 8
Dashboard: http://10.1.0.7:8787/status,Memory: 30.59 GiB
Nanny: tls://10.1.0.7:37941,
Local directory: /scratch/dask-scratch-space/worker-jru6dk8k,Local directory: /scratch/dask-scratch-space/worker-jru6dk8k

0,1
Comm: tls://10.1.0.5:37289,Total threads: 8
Dashboard: http://10.1.0.5:8787/status,Memory: 30.59 GiB
Nanny: tls://10.1.0.5:35463,
Local directory: /scratch/dask-scratch-space/worker-o8otlm70,Local directory: /scratch/dask-scratch-space/worker-o8otlm70


2024-05-29 21:29:36,801 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [None]:
#shape_ids = admin.aggregate_array('shapeID').distinct().getInfo()
# shape_ids = ['42512837B26705409874577']

In [None]:
# Create and submit jobs among the workers.
# Allow for retries to handle "Too many concurrent aggregations." errors
submitted_jobs = [
    {
        'shape_id': shape_id, 
        'area':client.submit(
            get_area, forest_baseline, shape_id, 
            retries=2
        )
    }
    for shape_id in shape_ids
]

In [20]:
## Debug issue with large regions by trying a problematic shape_id
# submitted_jobs = [
#     {
#         'shape_id': '42512837B26705409874577',
#         'tile_scale': tile_scale,
#         'area':client.submit(
#             get_area, forest_baseline, '42512837B26705409874577',
#             retries=1
#         )
#     }
# ]

In [None]:
submitted_jobs

In [None]:
results = client.gather(submitted_jobs)

In [None]:
import pandas as pd
pd.DataFrame(client.gather(submitted_jobs[:10]))

In [None]:
for job in submitted_jobs:
    if job['area'].status in ['error']:
        future = job['area']
        print(future.exception())
        print(job['tile_scale'])