# Example: Forested Area by Admin Level 2

## Setup

In [None]:
import ee
from earthengine_dask.core import ClusterGEE
import google.auth
import pandas as pd
from pprint import pprint

## Authenticate & Initialize Earth Engine

Get credentials and the GCP project ID, authenticating if necessary.

In [None]:
try:
    credentials, project_id = google.auth.default()
except google.auth.exceptions.DefaultCredentialsError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
try:
    ee.Initialize(credentials=credentials, project=project_id)
except google.auth.exceptions.RefreshError:
    !gcloud auth application-default login
    credentials, project_id = google.auth.default()
ee.Initialize(credentials=credentials, project=project_id)

# Input Data

## Input: Forest Baseline

This example will use the [European Commission Joint Research Centre's 2020 global map of forest cover](https://data.jrc.ec.europa.eu/dataset/10d1b337-b7d1-4938-a048-686c8185b290) for the forest baseline. The dataset is [available in Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/JRC_GFC2020_V1).

In [None]:
ic = ee.ImageCollection("JRC/GFC2020/V1")

In [None]:
pprint(ic.getInfo())

{'bands': [],
 'features': [{'bands': [{'crs': 'EPSG:4326',
                          'crs_transform': [8.983152841195215e-05,
                                            0,
                                            -170.00005897568744,
                                            0,
                                            -8.983152841195215e-05,
                                            80.03737653225383],
                          'data_type': {'max': 255,
                                        'min': 0,
                                        'precision': 'int',
                                        'type': 'PixelType'},
                          'dimensions': [4007503, 1559941],
                          'id': 'Map'}],
               'id': 'JRC/GFC2020/V1/2020',
               'properties': {'system:asset_size': 59186087403,
                              'system:footprint': {'coordinates': [[-180, -90],
                                                                   [1

In [None]:
print(f'There is {ic.size().getInfo()} image in the collection.')

There is 1 image in the collection.


... which we will use as the forest baseline.

In [None]:
jrc_forest_baseline = ic.first()

Verify that the image is binary.

In [None]:
jrc_forest_baseline.unmask().reduceRegion(
    reducer=ee.Reducer.minMax(),
    geometry=ee.Geometry.BBox(10, 10, 11, 11),
    maxPixels=1e10,
).getInfo()

{'Map_max': 1, 'Map_min': 0}

Looking at the projection information, the image is in decimal degrees of latitude and longitude (EPSG:4326).

In [None]:
proj_info = jrc_forest_baseline.projection().getInfo()
pprint(proj_info)

{'crs': 'EPSG:4326',
 'transform': [8.983152841195215e-05,
               0,
               -170.00005897568744,
               0,
               -8.983152841195215e-05,
               80.03737653225383],
 'type': 'Projection'}


In [None]:
print(f'The nominal scale (at the equator) is '
      f'{jrc_forest_baseline.projection().nominalScale().getInfo()} meters/pixel.')

The nominal scale (at the equator) is 10 meters/pixel.


### LUCA Monthly Alerts

In [None]:
from datetime import datetime

UNIX_EPOCH = datetime(1970, 1, 1)

ASSET_LOCATION = 'projects/global-forest-structure/assets/'

alertsBase = ee.ImageCollection(ASSET_LOCATION + 'luca/mosaic1/luca')
alertsBase = alertsBase.mosaic().setDefaultProjection(
    ee.Projection(
        crs='EPSG:4326',
        transform=[0.00008983152841195215, 0, -180,
                    0, -0.00008983152841195215, 80]
    )
)
alertsBase = alertsBase.select("date_sample")

def get_luca_alert_image_for_time_interval(start_date, end_date):
    "Get LUCA Alert image for a time interval."

    # Covert to LUCA date encoding (days since Unix epoch)
    start_date_int = (start_date - UNIX_EPOCH).days
    end_date_int = (end_date - UNIX_EPOCH).days

    # Create a binary image based on the starting and ending days
    filtered = alertsBase.gte(start_date_int).And(alertsBase.lt(end_date_int))

    return filtered

def get_luca_alert_image_for_month(year, month):
    "Get LUCA Alert image for a given month."
    start_date = datetime(year, month, 1)
    end_date = datetime(year, month + 1, 1)
    return get_luca_alert_image_for_time_interval(start_date, end_date)

luca_monthly_image = get_luca_alert_image_for_month(year=2023, month=1)

## Input: Administrative Boundaries

### GeoBoundaries ADM2

We will use the municipal level (ADM2) boundaries provided by the [geoBoundaries](https://www.geoboundaries.org/) global database of political administrative boundaries v6.0, which is also [available in Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/WM_geoLab_geoBoundaries_600_ADM2).

In [None]:
geoboundaries_adm2 = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM2")
geoboundaries_adm2 = geoboundaries_adm2.filter(ee.Filter.eq('shapeGroup', 'CAN'))
# geoboundaries_adm2 = geoboundaries_adm2.filter(ee.Filter.eq('shapeName', 'Colorado'))
# geoboundaries_adm2 = geoboundaries_adm2.filter(ee.Filter.eq('shapeName', 'Boulder'))

# roi = ee.Geometry.Polygon(
#         [[[-109.01952260759319, 40.971552045695994],
#           [-109.01952260759319, 37.01127149086416],
#           [-101.99925893571819, 37.01127149086416],
#           [-101.99925893571819, 40.971552045695994]]], None, False)
# geoboundaries_adm2 = geoboundaries_adm2.filterBounds(roi)

geoboundary_properties = None  # i.e. use all the properties

There are quite a few features in the collection.

In [None]:
print(f'There are {geoboundaries_adm2.size().getInfo()} features in the collection.')

There are 76 features in the collection.


In [None]:
geoboundaries_adm2.first().getInfo()['properties']

{'shapeGroup': 'CAN',
 'shapeID': '811477B25825438233887',
 'shapeName': 'Avalon Peninsula',
 'shapeType': 'ADM2'}

In [None]:
# admin.aggregate_histogram('shapeName').getInfo()

In [None]:
geoboundaries_id_field = 'shapeID'
geoboundaries_adm2_list = geoboundaries_adm2.aggregate_histogram(geoboundaries_id_field).getInfo().keys()

### GADM Level 2 Boundaries

In [None]:
gadm2 = ee.FeatureCollection("projects/global-forest-structure/assets/admin_boundaries/gadm-level2")

In [None]:
# Check for problematic metadata
gadm2_invalid_metadata = gadm2.filter(ee.Filter.inList('GID_2', ['?', '']))
print(f'There are {gadm2_invalid_metadata.size().getInfo()} features with invalid GID_2 values.')

There are 14 features with invalid GID_2 values.


In [None]:
# Remove (14) problematic records that will need to be processed separately
gadm2 = gadm2.filter(ee.Filter.Not(ee.Filter.inList('GID_2', ['?', ''])))

# gadm2 = gadm2.filter(ee.Filter.eq('GID_0', 'CAN'))
# gadm2 = gadm2.filter(ee.Filter.eq('HASC_1', 'US.CO'))

In [None]:
gadm2_properties = ['UID', 'GID_0', 'GID_1', 'GID_2', 'CONTINENT', 'NAME_0', 'NAME_1', 'NAME_2']

In [None]:
print(f'There are {gadm2.size().getInfo()} features in the collection.')

There are 47223 features in the collection.


In [None]:
# Display a sample record
gadm2.first().getInfo()['properties']

{'CC_1': '',
 'CC_2': '47',
 'CONTINENT': 'Europe',
 'COUNTRY': 'France',
 'DISPUTEDBY': '',
 'ENGTYPE_1': 'Region',
 'ENGTYPE_2': 'Department',
 'GID_0': 'FRA',
 'GID_1': 'FRA.10_1',
 'GID_2': 'FRA.10.10_1',
 'GOVERNEDBY': '',
 'HASC_1': 'FR.AC',
 'HASC_2': 'FR.LG',
 'ISO_1': 'FR-NAQ',
 'NAME_0': 'France',
 'NAME_1': 'Nouvelle-Aquitaine',
 'NAME_2': 'Lot-et-Garonne',
 'NL_NAME_1': '',
 'NL_NAME_2': '',
 'REGION': '',
 'SOVEREIGN': 'France',
 'SUBCONT': '',
 'TYPE_1': 'Région',
 'TYPE_2': 'Département',
 'UID': 84118,
 'VALIDFR_1': '2016-01-01',
 'VALIDFR_2': 'Unknown',
 'VARNAME_0': '',
 'VARNAME_1': '',
 'VARNAME_2': '',
 'VARREGION': ''}

In [None]:
gadm2_id_field = 'GID_2'
gadm2_region_dict = gadm2.aggregate_histogram(gadm2_id_field).getInfo()
pprint(gadm2_region_dict)
gadm2_region_list = gadm2_region_dict.keys()
# gadm2_region_list

{'AFG.1.10_1': 1,
 'AFG.1.11_1': 1,
 'AFG.1.12_1': 1,
 'AFG.1.13_1': 1,
 'AFG.1.1_1': 1,
 'AFG.1.2_1': 1,
 'AFG.1.3_1': 1,
 'AFG.1.4_1': 1,
 'AFG.1.5_1': 1,
 'AFG.1.6_1': 1,
 'AFG.1.7_1': 1,
 'AFG.1.8_1': 1,
 'AFG.1.9_1': 1,
 'AFG.10.1_1': 1,
 'AFG.10.2_1': 1,
 'AFG.10.3_1': 1,
 'AFG.10.4_1': 1,
 'AFG.10.5_1': 1,
 'AFG.10.6_1': 1,
 'AFG.10.7_1': 1,
 'AFG.11.10_1': 1,
 'AFG.11.11_1': 1,
 'AFG.11.12_1': 1,
 'AFG.11.13_1': 1,
 'AFG.11.1_1': 1,
 'AFG.11.2_1': 1,
 'AFG.11.3_1': 1,
 'AFG.11.4_1': 1,
 'AFG.11.5_1': 1,
 'AFG.11.6_1': 1,
 'AFG.11.7_1': 1,
 'AFG.11.8_1': 1,
 'AFG.11.9_1': 1,
 'AFG.12.10_1': 1,
 'AFG.12.11_1': 1,
 'AFG.12.12_1': 1,
 'AFG.12.13_1': 1,
 'AFG.12.14_1': 1,
 'AFG.12.15_1': 1,
 'AFG.12.16_1': 1,
 'AFG.12.1_1': 1,
 'AFG.12.2_1': 1,
 'AFG.12.3_1': 1,
 'AFG.12.4_1': 1,
 'AFG.12.5_1': 1,
 'AFG.12.6_1': 1,
 'AFG.12.7_1': 1,
 'AFG.12.8_1': 1,
 'AFG.12.9_1': 1,
 'AFG.13.1_1': 1,
 'AFG.13.2_1': 1,
 'AFG.13.3_1': 1,
 'AFG.13.4_1': 1,
 'AFG.13.5_1': 1,
 'AFG.13.6_1': 1,
 'AFG.13

## GADM polygons

In [None]:
gadm = ee.FeatureCollection("projects/vorgeo-ctrees/assets/admin_boundaries/gadm_410_utf8")

gadm_properties = ['CONTINENT', 'NAME_0', 'NAME_1', 'NAME_2']

In [None]:
print(f'There are {gadm.size().getInfo()} features in the collection.')

There are 356510 features in the collection.


# Analysis

Define a function that calculates the forested area, and adds it back to the feature.

In [None]:
def get_area(img, admin_fc, id_field, shape_id, properties=None):

    fc = ee.FeatureCollection(
        admin_fc.filter(ee.Filter.eq(id_field, shape_id))
    )
    feat = fc.first()

    stats_sum = ee.Number(
        img.multiply(ee.Image.pixelArea()).reduceRegions(
            collection=fc,
            reducer=ee.Reducer.sum(),
        ).aggregate_array('sum').get(0)
    )
    prop_dict = feat.toDictionary(properties).set('area_km2', stats_sum.round().multiply(1e-6))
    
    return ee.Dictionary(prop_dict).getInfo()

In [None]:
# binary_image = jrc_forest_baseline
# admin_fc = geoboundaries_adm2
# admin_regions = geoboundaries_adm2_list
# id_field = geoboundaries_id_field
# properties = geoboundary_properties

binary_image = luca_monthly_image
admin_fc = gadm2
admin_regions = gadm2_region_list
id_field = gadm2_id_field
properties = gadm2_properties

In [None]:
# Try to run one
region_id = list(admin_regions)[0]
print(region_id)
test = get_area(binary_image, admin_fc, id_field, region_id, properties)
test

AFG.1.10_1


{'CONTINENT': 'Asia',
 'GID_0': 'AFG',
 'GID_1': 'AFG.1_1',
 'GID_2': 'AFG.1.10_1',
 'NAME_0': 'Afghanistan',
 'NAME_1': 'Badakhshan',
 'NAME_2': 'Shahri Buzurg',
 'UID': 10,
 'area_km2': 0}

In [None]:
# # Use for debugging to count the total area, rather than forested area
# forest_baseline = forest_baseline.unmask().multiply(0).add(1)

In [None]:
# Try it out.
# region_list = admin.aggregate_array('shapeID').distinct().getInfo()
# region_list

In [None]:
# # tileScale=1
# get_area(img=forest_baseline, shape_id='42512837B26705409874577')

In [None]:
# # tileScale=16
# get_area(img=forest_baseline, shape_id='42512837B26705409874577')

## Start Dask Cluster

Start up a Earth Engine enabled cluster. This may take a few minutes to complete.

In [None]:
cluster = ClusterGEE(
    name='test-cluster-forest-by-admin-temp4',
    n_workers=5,
    worker_cpu=8,
    # spot_policy="spot_with_fallback",
    region='us-west1',
    idle_timeout="4 hours",
)

Google Application Default Credentials have been written to a file on your Coiled VM(s).
These credentials will potentially be valid until explicitly revoked by running
gcloud auth application-default revoke


Retrieve a client for the cluster, and display it.

In [None]:
client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: earthengine_dask.ClusterGEE
Dashboard: https://cluster-corza.dask.host/_lymxilghH8taEOo/status,

0,1
Dashboard: https://cluster-corza.dask.host/_lymxilghH8taEOo/status,Workers: 5
Total threads: 40,Total memory: 152.91 GiB

0,1
Comm: tls://10.1.0.76:8786,Workers: 5
Dashboard: http://10.1.0.76:8787/status,Total threads: 40
Started: Just now,Total memory: 152.91 GiB

0,1
Comm: tls://10.1.0.69:33217,Total threads: 8
Dashboard: http://10.1.0.69:8787/status,Memory: 30.58 GiB
Nanny: tls://10.1.0.69:42797,
Local directory: /scratch/dask-scratch-space/worker-6a8iwl_r,Local directory: /scratch/dask-scratch-space/worker-6a8iwl_r

0,1
Comm: tls://10.1.0.65:38293,Total threads: 8
Dashboard: http://10.1.0.65:8787/status,Memory: 30.58 GiB
Nanny: tls://10.1.0.65:44467,
Local directory: /scratch/dask-scratch-space/worker-ds12lel8,Local directory: /scratch/dask-scratch-space/worker-ds12lel8

0,1
Comm: tls://10.1.0.74:37383,Total threads: 8
Dashboard: http://10.1.0.74:8787/status,Memory: 30.58 GiB
Nanny: tls://10.1.0.74:35647,
Local directory: /scratch/dask-scratch-space/worker-_ah1c0sp,Local directory: /scratch/dask-scratch-space/worker-_ah1c0sp

0,1
Comm: tls://10.1.0.75:34575,Total threads: 8
Dashboard: http://10.1.0.75:8787/status,Memory: 30.59 GiB
Nanny: tls://10.1.0.75:46789,
Local directory: /scratch/dask-scratch-space/worker-x69z4woq,Local directory: /scratch/dask-scratch-space/worker-x69z4woq

0,1
Comm: tls://10.1.0.70:42939,Total threads: 8
Dashboard: http://10.1.0.70:8787/status,Memory: 30.58 GiB
Nanny: tls://10.1.0.70:38637,
Local directory: /scratch/dask-scratch-space/worker-wiwkmy5h,Local directory: /scratch/dask-scratch-space/worker-wiwkmy5h


In [None]:
# Problematic shape?
#region_list = admin.aggregate_array('shapeID').distinct().getInfo()
# region_list = ['42512837B26705409874577']

In [None]:
# Create and submit jobs among the workers.
# Allow for retries to handle "Too many concurrent aggregations." errors
submitted_jobs = [
    client.submit(
            get_area,  # function
            binary_image, admin_fc, id_field, region_id, properties,  # function parameters
            retries=1
        )
    for region_id in admin_regions
]

In [None]:
## Debug issue with large regions by trying a problematic shape_id
# submitted_jobs = [
#     {
#         'shape_id': '42512837B26705409874577',
#         'tile_scale': tile_scale,
#         'area':client.submit(
#             get_area, forest_baseline, '42512837B26705409874577',
#             retries=1
#         )
#     }
# ]

In [None]:
# for job in submitted_jobs:
#     if job.status in ['error']:
#         future = job
#         print(job)
#         print(future.exception())

jobs_with_errors = [
    (job, job.exception(), )
    for job in submitted_jobs
    if job.status in ['error']
]
jobs_with_errors

[(<Future: error, key: get_area-701fb5d0592e267c6f34cee71e1d2de0>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-205248cf8f9a8ce2a2ea0a8932edaec4>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-4373a0aeeec9af5be4cd6dc59eb95a1e>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-aaca9de283254793f277121fc2aeb47e>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-1322cf74a706dc0b2581e6079e514107>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-aea540eb1faf2496150308795ab131d3>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-ea0595b6a2039a1f570e339acb00317b>,
  ee.ee_exception.EEException('Computation timed out.')),
 (<Future: error, key: get_area-b869574742e243e364a6be9b23dbf6d8>,
  ee.ee_exception.EEException('Computation timed out.')),


## Get finished jobs

In [None]:
finished_jobs = [job for job in submitted_jobs if job.status=='finished']
print(len(finished_jobs))

47205


In [None]:
finished_results = client.gather(finished_jobs)
finished_results[-5:]

[{'CONTINENT': 'Africa',
  'GID_0': 'ZWE',
  'GID_1': 'ZWE.9_1',
  'GID_2': 'ZWE.9.5_2',
  'NAME_0': 'Zimbabwe',
  'NAME_1': 'Matabeleland South',
  'NAME_2': 'Gwanda Urban',
  'UID': 356087,
  'area_km2': 0},
 {'CONTINENT': 'Africa',
  'GID_0': 'ZWE',
  'GID_1': 'ZWE.9_1',
  'GID_2': 'ZWE.9.6_2',
  'NAME_0': 'Zimbabwe',
  'NAME_1': 'Matabeleland South',
  'NAME_2': 'Insiza',
  'UID': 356125,
  'area_km2': 0},
 {'CONTINENT': 'Africa',
  'GID_0': 'ZWE',
  'GID_1': 'ZWE.9_1',
  'GID_2': 'ZWE.9.7_2',
  'NAME_0': 'Zimbabwe',
  'NAME_1': 'Matabeleland South',
  'NAME_2': 'Mangwe',
  'UID': 356144,
  'area_km2': 0},
 {'CONTINENT': 'Africa',
  'GID_0': 'ZWE',
  'GID_1': 'ZWE.9_1',
  'GID_2': 'ZWE.9.8_2',
  'NAME_0': 'Zimbabwe',
  'NAME_1': 'Matabeleland South',
  'NAME_2': 'Matobo',
  'UID': 356161,
  'area_km2': 0},
 {'CONTINENT': 'Africa',
  'GID_0': 'ZWE',
  'GID_1': 'ZWE.9_1',
  'GID_2': 'ZWE.9.9_2',
  'NAME_0': 'Zimbabwe',
  'NAME_1': 'Matabeleland South',
  'NAME_2': 'Plumtree',
  'UID'

In [None]:
df = pd.DataFrame(finished_results)
df

Unnamed: 0,CONTINENT,GID_0,GID_1,GID_2,NAME_0,NAME_1,NAME_2,UID,area_km2
0,Asia,AFG,AFG.1_1,AFG.1.10_1,Afghanistan,Badakhshan,Shahri Buzurg,10,0.0
1,Asia,AFG,AFG.1_1,AFG.1.11_1,Afghanistan,Badakhshan,Shighnan,11,0.0
2,Asia,AFG,AFG.1_1,AFG.1.12_1,Afghanistan,Badakhshan,Wakhan,12,0.0
3,Asia,AFG,AFG.1_1,AFG.1.13_1,Afghanistan,Badakhshan,Zebak,13,0.0
4,Asia,AFG,AFG.1_1,AFG.1.1_1,Afghanistan,Badakhshan,Baharak,1,0.0
...,...,...,...,...,...,...,...,...,...
47200,Africa,ZWE,ZWE.9_1,ZWE.9.5_2,Zimbabwe,Matabeleland South,Gwanda Urban,356087,0.0
47201,Africa,ZWE,ZWE.9_1,ZWE.9.6_2,Zimbabwe,Matabeleland South,Insiza,356125,0.0
47202,Africa,ZWE,ZWE.9_1,ZWE.9.7_2,Zimbabwe,Matabeleland South,Mangwe,356144,0.0
47203,Africa,ZWE,ZWE.9_1,ZWE.9.8_2,Zimbabwe,Matabeleland South,Matobo,356161,0.0


In [None]:
# Show regions with the maximum alert area
df.sort_values('area_km2').tail()

Unnamed: 0,CONTINENT,GID_0,GID_1,GID_2,NAME_0,NAME_1,NAME_2,UID,area_km2
9648,Africa,CIV,CIV.4_1,CIV.4.2_1,Côte d'Ivoire,Denguélé,Kabadougou,46526,59.271552
25152,Asia,MMR,MMR.13_1,MMR.13.4_1,Myanmar,Shan,Lasho,211277,62.372224
26919,Africa,NGA,NGA.35_1,NGA.35.2_1,Nigeria,Taraba,Bali,216905,62.951142
9685,Africa,CMR,CMR.3_1,CMR.3.4_1,Cameroon,Est,Lom et Djerem,35857,80.087546
38507,Africa,TCD,TCD.13_1,TCD.13.1_1,Chad,Mandoul,Barh Sara,41886,116.506367


In [None]:
# Write the results to a CSV file
with open('output.csv', 'w') as out:
    df.to_csv(out)

In [None]:
#client.shutdown()