In [None]:
import os

import fsspec

from utils import get_logger
from utils.dask import create_cluster
from utils.dask import upload_source
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import HLSBand
from utils.hls.compute import calculate_job_median
from utils.hls.compute import process_catalog



In [None]:
os.environ['AZ_ACCESS_KEY_USFS']=""
os.environ['AZ_ACCESS_KEY_LUMONITOREASTUS2']=""

catalog_url = fsspec.get_mapper(
    f"az://fia/catalogs/hls_conus_2015-2019.zarr",
    account_name="usfs",
    account_key=os.environ['AZ_ACCESS_KEY_USFS']
)

catalog = HLSCatalog.from_zarr(catalog_url)
bands = [
    HLSBand.COASTAL_AEROSOL,
    HLSBand.BLUE,
    HLSBand.GREEN,
    HLSBand.RED,
    HLSBand.NIR_NARROW,
    HLSBand.SWIR1,
    HLSBand.SWIR2,
    HLSBand.QA  # needed for qa
]
 
catalog.xr_ds.attrs['bands'] = bands


logger = get_logger('hls-conus-2016')

In [None]:
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['year']== 2016, drop=True)
# Had to do this as 2 steps b/c I was getting an error about duplicate indices
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['sensor']== 'L', drop=True)

In [None]:
cluster = create_cluster(64, 1, 4)
print(f"Cluster dashboard visible at: {cluster.dashboard_link}")
client = cluster.get_client()


In [None]:
upload_source('./utils', client)

In [None]:
process_catalog(
    catalog=catalog.xr_ds,
    catalog_groupby="index",
    job_fn=calculate_job_median,
    job_groupby="time.year",
    chunks={'band': 1, 'x': 3660, 'y': 3660},
    account_name="lumonitoreastus2",
    storage_container="hls",
    account_key=os.environ['AZ_ACCESS_KEY_LUMONITOREASTUS2'],
    client=client,
    concurrency=2,  # because?
    checkpoint_path='./checkpoint',  # ?
    logger=logger
)

In [None]:
cluster.shutdown()