In [6]:
import os

import fsspec
import geopandas as gpd

from utils import get_logger
from utils.dask import create_cluster
from utils.hls.catalog import HLSCatalog
from utils.hls.catalog import HLSBand
from utils.hls.compute import calculate_job_median
from utils.hls.compute import jobs_from_catalog, process_jobs

In [7]:
j = gpd.read_file('./cb_2019_us_state_5m.zip')

In [8]:
os.environ['AZURE_STORAGE_ACCOUNT'] = 'lumonitor'
os.environ['AZURE_STORAGE_ACCESS_KEY'] = ''
# This stopped working on pangeo upgrade on 25Mar2021
# tiger_states  = gpd.read_file('zip+http://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_5m.zip').to_crs('EPSG:4326')

tiger_states = gpd.read_file('./cb_2019_us_state_5m.zip').to_crs('EPSG:4326')
california = tiger_states[tiger_states['NAME']=='California']

bands = [
    HLSBand.COASTAL_AEROSOL,
    HLSBand.BLUE,
    HLSBand.GREEN,
    HLSBand.RED,
    HLSBand.NIR_NARROW,
    HLSBand.SWIR1,
    HLSBand.SWIR2,
    HLSBand.QA  # needed for qa
]
 


In [16]:
catalog = HLSCatalog.from_geom(geom=california, years=[2013], bands=bands)

# read the entire data once (each tile is 3660x3660)...
chunks = {'band': 1, 'x': 3660, 'y': 3660}

logger = get_logger('hls-conus-2016')

catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['year']== 2013, drop=True)
# Had to do this as 2 steps b/c I was getting an error about duplicate indices
catalog.xr_ds = catalog.xr_ds.where(catalog.xr_ds['sensor']== 'L', drop=True)

print(catalog.xr_ds)



Reading tile extents...
Read tile extents for 56686 tiles
<xarray.Dataset>
Dimensions:  (index: 2171)
Coordinates:
  * index    (index) int64 0 0 0 0 0 0 0 0 0 0 ... 73 73 73 73 73 73 73 73 73 73
Data variables:
    tile     (index) object '10SEF' '10SEF' '10SEF' ... '10SDH' '10SDH' '10SDH'
    year     (index) object 2013 2013 2013 2013 2013 ... 2013 2013 2013 2013
    scene    (index) object 'L30/HLS.L30.T10SEF.2013106.v1.4' ... 'L30/HLS.L3...
    sensor   (index) object 'L' 'L' 'L' 'L' 'L' 'L' ... 'L' 'L' 'L' 'L' 'L' 'L'
    dt       (index) datetime64[ns] 2013-04-16 2013-04-25 ... 2013-12-26
Attributes:
    bands:    [<HLSBand.COASTAL_AEROSOL: 1>, <HLSBand.BLUE: 2>, <HLSBand.GREE...


In [21]:
jobs = jobs_from_catalog(catalog.xr_ds, 'tile')

cluster_args = dict(
    workers=64,
    worker_threads=1,
    worker_memory=8,
    scheduler_threads=4,
    scheduler_memory=8,
    environment_options = dict(
        AZURE_STORAGE_ACCOUNT=os.environ['AZURE_STORAGE_ACCOUNT'],
        AZURE_STORAGE_ACCESS_KEY=os.environ['AZURE_STORAGE_ACCESS_KEY'],
        CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE='YES'
    )
)

process_jobs(
    jobs=jobs,
    job_fn=calculate_job_median,
    concurrency=4,
    checkpoint_path='./checkpoint',
    logger=logger,
    cluster_args=cluster_args,
    code_path='./utils',
    job_groupby='time.year',
    bands=bands,
    chunks=chunks,
    account_name=os.environ['AZURE_STORAGE_ACCOUNT'],
    account_key=os.environ['AZURE_STORAGE_ACCESS_KEY'],
    storage_container='lumonitor',
    subfolder='2013'
)

2021-03-26 00:26:16,687 [INFO] hls-conus-2016 - Starting cluster
2021-03-26 00:26:16,687 [INFO] hls-conus-2016 - Starting cluster
2021-03-26 00:26:16,687 [INFO] hls-conus-2016 - Starting cluster
2021-03-26 00:26:16,687 [INFO] hls-conus-2016 - Starting cluster
2021-03-26 00:26:16,687 [INFO] hls-conus-2016 - Starting cluster
2021-03-26 00:26:16,687 [INFO] hls-conus-2016 - Starting cluster
2021-03-26 00:26:26,572 [INFO] hls-conus-2016 - Cluster dashboard visible at /services/dask-gateway/clusters/default.4f17a959d44c4868a4c43b8543e8ecec/status
2021-03-26 00:26:26,572 [INFO] hls-conus-2016 - Cluster dashboard visible at /services/dask-gateway/clusters/default.4f17a959d44c4868a4c43b8543e8ecec/status
2021-03-26 00:26:26,572 [INFO] hls-conus-2016 - Cluster dashboard visible at /services/dask-gateway/clusters/default.4f17a959d44c4868a4c43b8543e8ecec/status
2021-03-26 00:26:26,572 [INFO] hls-conus-2016 - Cluster dashboard visible at /services/dask-gateway/clusters/default.4f17a959d44c4868a4c43b