In [1]:
import os
import time

# pip/conda installed
import dask.array as da
import fsspec
import pandas as pd
import xarray as xr
from dask.distributed import as_completed
from dask.distributed import Client
from dask_gateway import GatewayCluster

from utils.dask import create_cluster
from utils.dask import upload_source
from utils.hls.catalog import HLSBand
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import scene_to_urls
from utils.hls.compute import process_catalog
from utils.hls.compute import calculate_job_median

In [2]:
# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

In [3]:
# fill with your account key
os.environ['AZURE_ACCOUNT_KEY'] = ""

In [4]:
catalog_url = fsspec.get_mapper(
    f"az://fia/catalogs/fia10.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)
point_catalog = HLSCatalog.from_zarr(catalog_url)

In [5]:
cluster = create_cluster(64, 1, 4)
print(f"Cluster dashboard visible at: {cluster.dashboard_link}")
client = cluster.get_client()

Cluster dashboard visible at: /services/dask-gateway/clusters/default.24c49989e49f4221a924824a0460b7ef/status


In [6]:
upload_source('./', client)

In [7]:
account_name="usfs"
storage_container="fia/hls-testing"
account_key=os.environ["AZURE_ACCOUNT_KEY"]
catalog_groupby = "INDEX"
job_groupby = "time.month"

process_catalog(point_catalog.xr_ds, catalog_groupby, calculate_job_median, job_groupby, chunks, account_name, storage_container, account_key, client, 2)

Submitting job 2
Submitting job 5
Completed job 2)
Submitting job 8
Completed job 5)
Completed job 8)
3 completed in 231.7607957609871 seconds


In [9]:
cluster.shutdown()

## TODO

1. Do QA on results