In [2]:
import os

import fsspec
import geopandas as gpd

from utils import get_logger
from utils.dask import create_cluster
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import HLSBand
from utils.hls.compute import calculate_job_median
from utils.hls.compute import jobs_from_catalog, process_jobs

In [5]:
os.environ['AZ_ACCESS_KEY_USFS']=""
os.environ['AZ_ACCESS_KEY_LUMONITOREASTUS2']=""
os.environ['AZURE_STORAGE_ACCOUNT'] = 'lumonitoreastus2'
os.environ['AZURE_STORAGE_ACCESS_KEY'] = os.environ['AZ_ACCESS_KEY_LUMONITOREASTUS2']

catalog_url = fsspec.get_mapper(
    f"az://fia/catalogs/hls_conus_2015-2019.zarr",
    account_name="usfs",
    account_key=os.environ['AZ_ACCESS_KEY_USFS']
)

tiger_states  = gpd.read_file('zip+http://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_5m.zip').to_crs('EPSG:4326')
california = tiger_states[tiger_states['NAME']=='California']

bands = [
    HLSBand.COASTAL_AEROSOL,
    HLSBand.BLUE,
    HLSBand.GREEN,
    HLSBand.RED,
    HLSBand.NIR_NARROW,
    HLSBand.SWIR1,
    HLSBand.SWIR2,
    HLSBand.QA  # needed for qa
]
 
catalog = HLSCatalog.from_geom(geom=california, years=[2016], bands=bands)

# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

logger = get_logger('hls-conus-2016')

print(catalog.xr_ds)

Reading tile extents...
Read tile extents for 56686 tiles
<xarray.Dataset>
Dimensions:  (index: 6351)
Coordinates:
  * index    (index) int64 0 0 0 0 0 0 0 0 0 0 ... 73 73 73 73 73 73 73 73 73 73
Data variables:
    tile     (index) object '11SQU' '11SQU' '11SQU' ... '11SMU' '11SMU' '11SMU'
    year     (index) object 2016 2016 2016 2016 2016 ... 2016 2016 2016 2016
    scene    (index) object 'L30/HLS.L30.T11SQU.2016009.v1.4' ... 'S30/HLS.S3...
    sensor   (index) object 'L' 'L' 'L' 'L' 'L' 'L' ... 'S' 'S' 'S' 'S' 'S' 'S'
    dt       (index) datetime64[ns] 2016-01-09 2016-01-16 ... 2016-12-25
Attributes:
    bands:    [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREE...


In [6]:
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['year']== 2016, drop=True)
# Had to do this as 2 steps b/c I was getting an error about duplicate indices
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['sensor']== 'L', drop=True)
# catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['tile']=='11ULP', drop=True)

print(catalog.xr_ds)

<xarray.Dataset>
Dimensions:  (index: 3161)
Coordinates:
  * index    (index) int64 0 0 0 0 0 0 0 0 0 0 ... 73 73 73 73 73 73 73 73 73 73
Data variables:
    tile     (index) object '11SQU' '11SQU' '11SQU' ... '11SMU' '11SMU' '11SMU'
    year     (index) object 2016 2016 2016 2016 2016 ... 2016 2016 2016 2016
    scene    (index) object 'L30/HLS.L30.T11SQU.2016009.v1.4' ... 'L30/HLS.L3...
    sensor   (index) object 'L' 'L' 'L' 'L' 'L' 'L' ... 'L' 'L' 'L' 'L' 'L' 'L'
    dt       (index) datetime64[ns] 2016-01-09 2016-01-16 ... 2016-12-31
Attributes:
    bands:    [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREE...


In [8]:
jobs = jobs_from_catalog(catalog.xr_ds, 'tile')

cluster_args = dict(
    workers=64,
    worker_threads=1,
    worker_memory=8,
    scheduler_threads=4,
    scheduler_memory=8
)

process_jobs(
    jobs=jobs,
    job_fn=calculate_job_median,
    concurrency=2,  # because?
    checkpoint_path='./checkpoint',  # ?
    logger=logger,
    cluster_args=cluster_args,
    code_path='./utils',
    job_groupby='time.year',
    bands=bands,
    chunks=chunks,
    account_name='lumonitoreastus2',
    storage_container='hls',
    account_key=os.environ['AZ_ACCESS_KEY_LUMONITOREASTUS2']
)

2021-02-10 22:01:56,095 [INFO] hls-conus-2016 - Starting cluster
2021-02-10 22:01:56,095 [INFO] hls-conus-2016 - Starting cluster
2021-02-10 22:02:02,937 [INFO] hls-conus-2016 - Cluster dashboard visible at /services/dask-gateway/clusters/default.64a00cc975954662abfc1502f1de92c4/status
2021-02-10 22:02:02,937 [INFO] hls-conus-2016 - Cluster dashboard visible at /services/dask-gateway/clusters/default.64a00cc975954662abfc1502f1de92c4/status
2021-02-10 22:02:02,961 [INFO] hls-conus-2016 - Uploading code to cluster
2021-02-10 22:02:02,961 [INFO] hls-conus-2016 - Uploading code to cluster
2021-02-10 22:02:02,964 [INFO] hls-conus-2016 - Submitting job 10SDH
2021-02-10 22:02:02,964 [INFO] hls-conus-2016 - Submitting job 10SDH
2021-02-10 22:02:02,966 [INFO] hls-conus-2016 - Submitting job 10SDJ
2021-02-10 22:02:02,966 [INFO] hls-conus-2016 - Submitting job 10SDJ
2021-02-10 22:05:14,577 [INFO] hls-conus-2016 - Completed job 10SDJ
2021-02-10 22:05:14,577 [INFO] hls-conus-2016 - Completed job 10

NameError: name 'cluster' is not defined