In [4]:
from datacube.utils.dask import start_local_dask
from dep_tools.loaders import Sentinel2OdcLoader
from dep_tools.namers import LocalPath
from dep_tools.writers import LocalDsWriter

from src.run_task import GeoMADSentinel2Processor, get_grid

In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
# Optionally set up a local dask cluster
client = start_local_dask()
client.dashboard_link

'http://127.0.0.1:8787/status'

In [7]:
# Issues

# Study site configuration
region_code = "63,20"
datetime = "2023"
item_id = region_code

# And get the study site
grid = get_grid()
cell = grid.loc[[(region_code)]]
cell.explore()

In [8]:
# Set up a data loader
loader = Sentinel2OdcLoader(
    epsg=3832,
    datetime=datetime,
    dask_chunksize=dict(time=1, x=4096, y=4096),
    odc_load_kwargs=dict(
        fail_on_error=False,
        resolution=10,
        # bands=["qa_pixel", "red", "green", "blue", "nir08", "swir16", "swir22"],
        bands=["SCL", "red", "green", "blue"]
    ),
    nodata_value=0,
    keep_ints=True,
    load_as_dataset=True,
)

# Run the load process, which is lazy-loaded
input_data = loader.load(cell)
input_data

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 22.32 GiB 32.00 MiB Shape (130, 9600, 9600) (1, 4096, 4096) Dask graph 1170 chunks in 1 graph layer Data type uint16 numpy.ndarray",9600  9600  130,

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 22.32 GiB 32.00 MiB Shape (130, 9600, 9600) (1, 4096, 4096) Dask graph 1170 chunks in 1 graph layer Data type uint16 numpy.ndarray",9600  9600  130,

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 22.32 GiB 32.00 MiB Shape (130, 9600, 9600) (1, 4096, 4096) Dask graph 1170 chunks in 1 graph layer Data type uint16 numpy.ndarray",9600  9600  130,

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 22.32 GiB 32.00 MiB Shape (130, 9600, 9600) (1, 4096, 4096) Dask graph 1170 chunks in 1 graph layer Data type uint16 numpy.ndarray",9600  9600  130,

Unnamed: 0,Array,Chunk
Bytes,22.32 GiB,32.00 MiB
Shape,"(130, 9600, 9600)","(1, 4096, 4096)"
Dask graph,1170 chunks in 1 graph layer,1170 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [10]:
# Set up a data processor
processor = GeoMADSentinel2Processor(
    scale_and_offset=False,
    work_chunks=(601, 601),
    num_threads=10,
    filters=[("closing", 5), ("opening", 5)],
    keep_ints=True
)

# Plan the processing. Still lazy-loaded
output_data = processor.process(input_data)
output_data

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 175.78 MiB 705.47 kiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type uint16 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 175.78 MiB 705.47 kiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type uint16 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 175.78 MiB 705.47 kiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type uint16 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,351.56 MiB,1.38 MiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 351.56 MiB 1.38 MiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type float32 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,351.56 MiB,1.38 MiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,351.56 MiB,1.38 MiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 351.56 MiB 1.38 MiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type float32 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,351.56 MiB,1.38 MiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,351.56 MiB,1.38 MiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 351.56 MiB 1.38 MiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type float32 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,351.56 MiB,1.38 MiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 175.78 MiB 705.47 kiB Shape (9600, 9600) (601, 601) Dask graph 256 chunks in 30 graph layers Data type uint16 numpy.ndarray",9600  9600,

Unnamed: 0,Array,Chunk
Bytes,175.78 MiB,705.47 kiB
Shape,"(9600, 9600)","(601, 601)"
Dask graph,256 chunks in 30 graph layers,256 chunks in 30 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [None]:
# Actually load data and do the processing, so we have it in memory
loaded = output_data.compute()
loaded

In [None]:
# Testing the Azure writer

# from dep_tools.writers import AzureDsWriter
# from dep_tools.namers import DepItemPath

# itempath = DepItemPath("geomad", "test", "0.0", datetime)

# writer = AzureDsWriter(
#     itempath=itempath,
#     overwrite=True,
#     convert_to_int16=False,
#     extra_attrs=dict(dep_version="0.0"),
# )

# writer.write(output_data, "test")


In [None]:
from odc.stac import load
from pystac import Item

item = Item.from_file("https://deppcpublicstorage.blob.core.windows.net/output/dep_geomad_test/0-0/test/2023-01/dep_geomad_test_test_2023-01.stac-item.json")

data = load([item], chunks={})
data

In [None]:
data.red.isel(time=0).plot.imshow(size=8, robust=True)

In [None]:
# This is the target path
dep_path = LocalPath(
    local_folder="data",
    sensor="ls",
    dataset_id="geomad",
    version="0.0.0",
    time=datetime,
)

# Set up a writer and write out the files
writer = LocalDsWriter(
    itempath=dep_path,
    output_nodata=0,
    use_odc_writer=True,
    overwrite=True,
    convert_to_int16=False
)
out_files = writer.write(loaded, item_id)

In [None]:
# Make sure written files are readable
stac_path = writer.itempath.path(item_id, ext=".stac-item.json")

item = Item.from_file(stac_path)
item.validate()