In [1]:
import os

# pip/conda installed
import fsspec

from utils import get_logger
from utils.hls.catalog import HLSCatalog
from utils.hls.compute import process_jobs
from utils.hls.compute import jobs_from_catalog
from utils.hls.compute import calculate_job_median

In [2]:
logger = get_logger('hls-conus')
cluster_args = dict(
    workers=64,
    worker_threads=1,
    worker_memory=8,
    scheduler_threads=4,
    scheduler_memory=8
)
code_path = './utils'

In [3]:
# fill with your account key
os.environ['AZURE_ACCOUNT_KEY'] = ""

In [4]:
catalog_url = fsspec.get_mapper(
    f"az://fia/catalogs/hls_conus_2015-2019.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)
catalog = HLSCatalog.from_zarr(catalog_url)

In [5]:
# kwargs for calculate_job_median
account_name = "usfs"
storage_container = "fia/hls"
account_key = os.environ["AZURE_ACCOUNT_KEY"]
job_groupby = "time.month"
bands = catalog.xr_ds.attrs['bands']
chunks = {'band': 1, 'x': 3660, 'y': 3660} # read an entire tile once (each tile is 3660x3660)

In [6]:
# Filter to scenes from 2015 and later, then group by year
yr_catalogs = catalog.xr_ds.where(catalog.xr_ds['year'] >= 2015, drop=True).groupby('year')

# catalog -> job args
catalog_groupby = "tile"

In [7]:
for yr, ca in yr_catalogs:
    logger.info(f"Starting process for {yr}")
    ca.info()
    storage_prefix = f"{storage_container}/{yr}"
    checkpoint_path = f"checkpoints/{yr}.txt"
    jobs = jobs_from_catalog(ca, catalog_groupby)
    process_jobs(
        jobs=jobs,
        job_fn=calculate_job_median,
        checkpoint_path=checkpoint_path,
        logger=logger,
        cluster_args=cluster_args,
        code_path=code_path,
        concurrency=2,  # run 2 jobs at once
        cluster_restart_freq=16,  # restart after 16 jobs
        # kwargs for calculate_job_median
        job_groupby=job_groupby,
        bands=bands,
        chunks=chunks,
        account_name=account_name,
        storage_container=storage_prefix,
        account_key=account_key,
    )

2021-02-05 22:16:05,315 [INFO] hls-conus - Starting process for 2015.0
xarray.Dataset {
dimensions:
	index = 52082 ;

variables:
	datetime64[ns] dt(index) ;
	object scene(index) ;
	object sensor(index) ;
	object tile(index) ;
	float64 year(index) ;
	int64 index(index) ;

// global attributes:
	:bands = [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREEN: 3>, <HLSBand.RED: 4>, <HLSBand.NIR_NARROW: 5>, <HLSBand.SWIR1: 6>, <HLSBand.SWIR2: 7>, <HLSBand.CIRRUS: 8>, <HLSBand.QA: 11>] ;
}2021-02-05 22:16:06,045 [INFO] hls-conus - Starting cluster
2021-02-05 22:16:16,855 [INFO] hls-conus - Cluster dashboard visible at /services/dask-gateway/clusters/default.cf4333e3a17a412e85154e5b397ed371/status
2021-02-05 22:16:16,878 [INFO] hls-conus - Uploading code to cluster
2021-02-05 22:16:16,881 [INFO] hls-conus - Submitting job 17SMD
2021-02-05 22:16:33,546 [ERROR] hls-conus - Exception from dask cluster
Traceback (most recent call last):
  File "/home/jovyan/data-ingestion/utils/hls/com