In [1]:
import os
import time

# pip/conda installed
import dask.array as da
import fsspec
import pandas as pd
import xarray as xr
from dask.distributed import as_completed
from dask.distributed import Client
from dask_gateway import GatewayCluster

from utils import get_logger
from utils.dask import create_cluster
from utils.dask import upload_source
from utils.hls.catalog import HLSBand
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import scene_to_urls
from utils.hls.compute import process_catalog
from utils.hls.compute import calculate_job_median

In [2]:
# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

In [3]:
# fill with your account key
os.environ['AZURE_ACCOUNT_KEY'] = ""

In [4]:
catalog_url = fsspec.get_mapper(
    f"az://fia/catalogs/hls_wa_2015-2019.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)
catalog = HLSCatalog.from_zarr(catalog_url)

In [7]:
cluster = create_cluster(64, 1, 4, 2, 4)
print(f"Cluster dashboard visible at: {cluster.dashboard_link}")
client = cluster.get_client()

Cluster dashboard visible at: /services/dask-gateway/clusters/default.3459c119763c4537a5a9ca048ccaa495/status


Exception in callback None()
handle: <Handle cancelled>
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/iostream.py", line 1391, in _do_ssl_handshake
    self.socket.do_handshake()
  File "/srv/conda/envs/notebook/lib/python3.8/ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:1124)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 189, in _handle_events
    handler_func(fileobj, events)
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/iostream.py", line 696, in _handle_events
    self._handle_read()
  File "/srv/conda/envs/notebook/lib/python3.8/site

In [8]:
upload_source('./', client)

In [9]:
account_name="usfs"
storage_container="fia/hls/wa"
account_key=os.environ["AZURE_ACCOUNT_KEY"]
catalog_groupby = "tile"
job_groupby = "time.month"

In [10]:
yr_catalogs = catalog.xr_ds.groupby('year')

In [11]:
logger = get_logger('hls-wa-test')
for yr, ca in yr_catalogs:
        logger.info(f"Starting process for {yr}")
        ca.info()
        storage_prefix = f"{storage_container}/{yr}"
        process_catalog(
            catalog=ca,
            catalog_groupby=catalog_groupby,
            job_fn=calculate_job_median,
            job_groupby=job_groupby,
            chunks=chunks,
            account_name=account_name,
            storage_container=storage_container,
            account_key=account_key,
            client=client,
            concurrency=5,
            logger=logger
        )

2021-01-01 01:36:05,807 [INFO] hls-wa-test - Starting process for 2015
xarray.Dataset {
dimensions:
	index = 2241 ;

variables:
	datetime64[ns] dt(index) ;
	int64 index(index) ;
	object scene(index) ;
	object sensor(index) ;
	object tile(index) ;
	int64 year(index) ;

// global attributes:
	:bands = [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREEN: 3>, <HLSBand.RED: 4>, <HLSBand.NIR_NARROW: 5>, <HLSBand.SWIR1: 6>, <HLSBand.SWIR2: 7>, <HLSBand.CIRRUS: 8>, <HLSBand.QA: 11>] ;
}2021-01-01 01:36:05,808 [INFO] hls-wa-test - None
2021-01-01 01:36:19,609 [INFO] hls-wa-test - Submitting job 10TCT
2021-01-01 01:36:19,701 [INFO] hls-wa-test - Submitting job 10TDR
2021-01-01 01:36:19,764 [INFO] hls-wa-test - Submitting job 10TDS
2021-01-01 01:36:19,859 [INFO] hls-wa-test - Submitting job 10TDT
2021-01-01 01:36:19,949 [INFO] hls-wa-test - Submitting job 10TER
2021-01-01 01:38:43,672 [INFO] hls-wa-test - Completed job 10TER
2021-01-01 01:38:43,673 [INFO] hls-wa-test - Submitting job

KeyboardInterrupt: 

In [12]:
cluster.shutdown()

## TODO

1. Do QA on results