In [1]:
import os
import time

# pip/conda installed
import dask.array as da
import fsspec
import pandas as pd
import xarray as xr
from dask.distributed import as_completed
from dask.distributed import Client
from dask_gateway import GatewayCluster

from utils import get_logger
from utils.dask import create_cluster
from utils.dask import upload_source
from utils.hls.catalog import HLSBand
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import scene_to_urls
from utils.hls.compute import process_catalog
from utils.hls.compute import calculate_job_median

In [2]:
logger = get_logger('hls-wa-test')

In [3]:
# fill with your account key
os.environ['AZURE_ACCOUNT_KEY'] = ""

In [4]:
# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

In [6]:
catalog_url = fsspec.get_mapper(
    f"az://fia/catalogs/hls_wa_2015-2019.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)
catalog = HLSCatalog.from_zarr(catalog_url)

In [15]:
num_workers = 64
cluster = create_cluster(
    workers=num_workers,
    worker_threads=1,
    worker_memory=4,
    scheduler_threads=4,
    scheduler_memory=8
)
client = cluster.get_client()
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

In [16]:
# All workers must be started when source code is uploaded to them.
logger.info("Waiting for cluster workers to start")
client.wait_for_workers(num_workers)
logger.info("Uploading code to workers")
upload_source('./utils', client)

2021-01-13 20:50:18,147 [INFO] hls-wa-test - Waiting for cluster workers to start
2021-01-13 20:50:30,260 [INFO] hls-wa-test - Uploading code to workers


In [17]:
account_name="usfs"
storage_container="fia/hls-testing/wa"
account_key=os.environ["AZURE_ACCOUNT_KEY"]
catalog_groupby = "tile"
job_groupby = "time.month"

In [18]:
# Filter to scenes from 2015 and later, then group by year
yr_catalogs = catalog.xr_ds.where(catalog.xr_ds['year'] >= 2015, drop=True).groupby('year')

In [19]:
for yr, ca in yr_catalogs:
        logger.info(f"Starting process for {yr}")
        ca.info()
        storage_prefix = f"{storage_container}/{yr}"
        process_catalog(
            catalog=ca,
            catalog_groupby=catalog_groupby,
            job_fn=calculate_job_median,
            job_groupby=job_groupby,
            chunks=chunks,
            account_name=account_name,
            storage_container=storage_prefix,
            account_key=account_key,
            client=client,
            concurrency=5,
            logger=logger
        )

2021-01-13 20:50:39,470 [INFO] hls-wa-test - Starting process for 2015.0
xarray.Dataset {
dimensions:
	index = 2241 ;

variables:
	datetime64[ns] dt(index) ;
	object scene(index) ;
	object sensor(index) ;
	object tile(index) ;
	float64 year(index) ;
	int64 index(index) ;

// global attributes:
	:bands = [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREEN: 3>, <HLSBand.RED: 4>, <HLSBand.NIR_NARROW: 5>, <HLSBand.SWIR1: 6>, <HLSBand.SWIR2: 7>, <HLSBand.CIRRUS: 8>, <HLSBand.QA: 11>] ;
}2021-01-13 20:50:53,977 [INFO] hls-wa-test - Submitting job 10TCT
2021-01-13 20:50:54,105 [INFO] hls-wa-test - Submitting job 10TDR
2021-01-13 20:50:54,187 [INFO] hls-wa-test - Submitting job 10TDS
2021-01-13 20:50:54,307 [INFO] hls-wa-test - Submitting job 10TDT
2021-01-13 20:50:54,426 [INFO] hls-wa-test - Submitting job 10TER
2021-01-13 20:53:03,355 [INFO] hls-wa-test - Completed job 10TDR
2021-01-13 20:53:03,356 [INFO] hls-wa-test - Submitting job 10TES
2021-01-13 20:53:10,895 [INFO] hls-wa-t

KeyboardInterrupt: 

In [20]:
cluster.shutdown()