In [1]:
import os

import fsspec
import geopandas as gpd

from utils import get_logger
from utils.dask import create_cluster
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import HLSBand
from utils.hls.compute import calculate_job_median
from utils.hls.compute import jobs_from_catalog, process_jobs

In [2]:
os.environ['AZURE_STORAGE_ACCOUNT'] = 'lumonitoreastus2'
os.environ['AZURE_STORAGE_ACCESS_KEY'] = ''
# This stopped working on pangeo upgrade on 25Mar2021
# tiger_states  = gpd.read_file('zip+http://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_5m.zip').to_crs('EPSG:4326')

tiger_states = gpd.read_file('./cb_2019_us_state_5m.zip').to_crs('EPSG:4326')
nopes = ['AK', 'GU', 'PR', 'VI', 'MP', 'AS', 'HI']
conus = tiger_states[~tiger_states.STUSPS.isin(nopes)].dissolve(by="LSAD")

bands = [
    HLSBand.COASTAL_AEROSOL,
    HLSBand.BLUE,
    HLSBand.GREEN,
    HLSBand.RED,
    HLSBand.NIR_NARROW,
    HLSBand.SWIR1,
    HLSBand.SWIR2,
    HLSBand.QA  # needed for qa
]
 


In [3]:
year = 2014
catalog = HLSCatalog.from_geom(geom=conus, years=[year], bands=bands)

# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

logger = get_logger('hls-conus')

catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['year']== year, drop=True)
# Had to do this as 2 steps b/c I was getting an error about duplicate indices
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['sensor']== 'L', drop=True)
# catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['tile'] == '10SFJ', drop=True)
print(catalog.xr_ds)



Reading tile extents...
Read tile extents for 56686 tiles
<xarray.Dataset>
Dimensions:  (index: 46219)
Coordinates:
  * index    (index) int64 0 0 0 0 0 0 0 0 0 ... 989 989 989 989 989 989 989 989
Data variables:
    tile     (index) object '15SVU' '15SVU' '15SVU' ... '13SFR' '13SFR' '13SFR'
    year     (index) object 2014 2014 2014 2014 2014 ... 2014 2014 2014 2014
    scene    (index) object 'L30/HLS.L30.T15SVU.2014001.v1.4' ... 'L30/HLS.L3...
    sensor   (index) object 'L' 'L' 'L' 'L' 'L' 'L' ... 'L' 'L' 'L' 'L' 'L' 'L'
    dt       (index) datetime64[ns] 2014-01-01 2014-01-08 ... 2014-12-29
Attributes:
    bands:    [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREE...


In [6]:
jobs = jobs_from_catalog(catalog.xr_ds, 'tile')

cluster_args = dict(
    workers=64,
    worker_threads=1,
    worker_memory=8,
    scheduler_threads=4,
    scheduler_memory=8,
    environment_options = dict(
        AZURE_STORAGE_ACCOUNT='',
        AZURE_STORAGE_ACCESS_KEY='',
        CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE='YES'
    )
)

process_jobs(
    jobs=jobs,
    job_fn=calculate_job_median,
    concurrency=4,
    checkpoint_path='./checkpoint2014',
    logger=logger,
    cluster_args=cluster_args,
    code_path='./utils',
    job_groupby='time.year',
    bands=bands,
    chunks=chunks,
    account_name=os.environ['AZURE_STORAGE_ACCOUNT'],
    account_key=os.environ['AZURE_STORAGE_ACCESS_KEY'],
    storage_container='hls',
    subfolder=f"zarr/{str(year)}"
)

2021-05-24 18:56:08,873 [INFO] hls-conus - Starting cluster
2021-05-24 18:56:16,984 [INFO] hls-conus - Cluster dashboard visible at /services/dask-gateway/clusters/default.22d07ff685d84ca5b8ab6001641be2a8/status
2021-05-24 18:56:17,004 [INFO] hls-conus - Uploading code to cluster
2021-05-24 18:56:17,007 [INFO] hls-conus - Submitting job 10SDH
2021-05-24 18:56:17,009 [INFO] hls-conus - Submitting job 10SDJ
2021-05-24 18:56:17,011 [INFO] hls-conus - Submitting job 10SEF
2021-05-24 18:56:17,013 [INFO] hls-conus - Submitting job 10SEG
2021-05-24 19:04:38,994 [INFO] hls-conus - Completed job 10SDH
2021-05-24 19:04:38,995 [INFO] hls-conus - Submitting job 10SEH
2021-05-24 19:04:39,830 [INFO] hls-conus - Completed job 10SEF
2021-05-24 19:04:39,831 [INFO] hls-conus - Submitting job 10SEJ
2021-05-24 19:04:39,963 [INFO] hls-conus - Completed job 10SEG
2021-05-24 19:04:39,964 [INFO] hls-conus - Submitting job 10SFE
2021-05-24 19:04:47,392 [INFO] hls-conus - Completed job 10SDJ
2021-05-24 19:04:47

Exception in callback None()
handle: <Handle cancelled>
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/iostream.py", line 1391, in _do_ssl_handshake
    self.socket.do_handshake()
  File "/srv/conda/envs/notebook/lib/python3.8/ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate (_ssl.c:1125)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 189, in _handle_events
    handler_func(fileobj, events)
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/tornado/iostream.py", line 696, in _handle_events
    self._handle

2021-05-24 19:07:23,433 [ERROR] hls-conus - Exception from dask cluster
Traceback (most recent call last):
  File "/home/jovyan/di-cog/utils/hls/compute.py", line 265, in run_job_subset
    result = future.result()
  File "/srv/conda/envs/notebook/lib/python3.8/site-packages/distributed/client.py", line 224, in result
    raise result
concurrent.futures._base.CancelledError: calculate_job_median-68e4a442e69b44279c79209a0c8c6320
2021-05-24 19:07:23,434 [INFO] hls-conus - Submitting job 10SGJ


AttributeError: 'NoneType' object has no attribute 'events'