In [1]:
import os

import fsspec
import geopandas as gpd

from utils import get_logger
from utils.dask import create_cluster
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import HLSBand
from utils.hls.compute import calculate_job_median
from utils.hls.compute import jobs_from_catalog, process_jobs

In [2]:
os.environ['AZURE_STORAGE_ACCOUNT'] = 'lumonitoreastus2'
os.environ['AZURE_STORAGE_ACCESS_KEY'] = ''
tiger_states  = gpd.read_file('zip+http://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_5m.zip').to_crs('EPSG:4326')

unneeded_states = ['AK', 'GU', 'PR', 'VI', 'MP', 'AS', 'HI']
conus = tiger_states[~tiger_states.STUSPS.isin(unneeded_states)].dissolve(by="LSAD")

bands = [
    HLSBand.COASTAL_AEROSOL,
    HLSBand.BLUE,
    HLSBand.GREEN,
    HLSBand.RED,
    HLSBand.NIR_NARROW,
    HLSBand.SWIR1,
    HLSBand.SWIR2,
    HLSBand.QA  # needed for qa
]
 


In [None]:
year = 2016 # or 2013 or 2020, etc
catalog = HLSCatalog.from_geom(geom=conus, years=[year], bands=bands)

# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

logger = get_logger('hls-conus')

catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['year']== year, drop=True)
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['sensor']== 'L', drop=True)
print(catalog.xr_ds)



In [None]:
jobs = jobs_from_catalog(catalog.xr_ds, 'tile')

cluster_args = dict(
    workers=64,
    worker_threads=1,
    worker_memory=8,
    scheduler_threads=4,
    scheduler_memory=8,
    environment_options = dict(
        AZURE_STORAGE_ACCOUNT=os.environ['AZURE_STORAGE_ACCOUNT'],
        AZURE_STORAGE_ACCESS_KEY=os.environ['AZURE_STORAGE_ACCESS_KEY'],
        CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE='YES'
    )
)

process_jobs(
    jobs=jobs,
    job_fn=calculate_job_median,
    concurrency=4,
    checkpoint_path='./checkpoint',
    logger=logger,
    cluster_args=cluster_args,
    code_path='./utils',
    job_groupby='time.year',
    bands=bands,
    chunks=chunks,
    account_name=os.environ['AZURE_STORAGE_ACCOUNT'],
    account_key=os.environ['AZURE_STORAGE_ACCESS_KEY'],
    storage_container='hls',
    subfolder=f"zarr/{str(year)}"
)