## Debugging
Cell for minimal code to send in Stories for debugging purposes

In [1]:
import pathlib
from typing import Dict, Union, List

from openeo import connect, Connection
from openeo.rest.datacube import DataCube
from openeo import processes
import pandas as pd

con: Connection = connect("openeo.cloud")
con.authenticate_oidc(provider_id="egi")

out_dir = pathlib.Path("output")
out_dir.mkdir(parents=True, exist_ok=True)

denia_harbour_bbox: Dict[str, Union[float, str]] = {"west": 0.10594089795383788, "east": 0.12937267590793944, "south": 38.83464299556706, "north": 38.85035302841166, "crs": "EPSG:4326"}
temporal_extent: List[str] = ["2021-01-01", "2021-04-01"]

collection = ("TERRASCOPE_S2_TOC_V2", ["B06", "B05", "B03"])
band_names = ["swir1", "nir", "green"]
percentile = 0.2

dr: pd.DatetimeIndex = pd.date_range(start=temporal_extent[0], end=temporal_extent[1], freq="MS")
t_intervals = [[str(d), str(dr[i+1])] for i, d in enumerate(dr[:-1])]

dc: DataCube = con.load_collection(
        collection_id=collection[0],
        spatial_extent=denia_harbour_bbox,
        temporal_extent=temporal_extent,
        bands=collection[1]
    ).add_dimension(name="source_name", label=collection[0], type="other") \
    .rename_labels(dimension="bands", source=collection[1], target=band_names)

# Create bucketed DC based on percentile of images
t_bucketed_dc: DataCube = dc \
    .aggregate_temporal(
        intervals=t_intervals,
        reducer=lambda data: processes.quantiles(data, probabilities=[percentile]),
        labels=[t_int[0] for t_int in t_intervals]
    )

print(len(t_intervals))

count_dc: DataCube = dc.band("green") \
    .apply(lambda data: processes.add(x=processes.multiply(x=data, y=0), y=1)) \
    .aggregate_temporal(
        intervals=t_intervals,
        reducer=lambda data: processes.sum(data),
        labels=[t_int[0] for t_int in t_intervals]
    )

mask: DataCube = count_dc.apply(lambda val: processes.gt(x=val, y=5))  # will invert later
# Try to reduce over time to filter for missing images at any timestep
mask_no_t = mask.reduce_dimension(dimension="t", reducer=processes.product).apply(lambda data: processes.eq(x=data, y=0))  # first product, then invert

dc = t_bucketed_dc.mask(mask_no_t)

t_bucketed_dc.download(out_dir / "test_time.nc", format="netcdf")
mask_no_t.download(out_dir / "test.nc", format="netcdf")
count_dc.download(out_dir / "testcount.nc", format="netcdf")
mask.download(out_dir / "mask.nc", format="netcdf")
dc.download(out_dir / "res_test.nc", format="netcdf")

Authenticated using refresh token.
3


In [5]:
import hvplot.xarray
import xarray as xr

dataset: xr.Dataset = xr.open_dataset(out_dir / "res_test.nc")
dataset
dataset["green"].hvplot(
    groupby="t",
    cmap="turbo",
    widget_type="scrubber",
    widget_location="bottom"
)