## Debugging
Cell for minimal code to send in Stories for debugging purposes

In [3]:
import pathlib
from typing import Dict, Union, List

from openeo import connect, Connection
from openeo.rest.datacube import DataCube
from openeo import processes
import pandas as pd

from utils import get_urls_from_dc

con: Connection = connect("openeo.cloud")
con.authenticate_oidc(provider_id="egi")

out_dir = pathlib.Path("output")
out_dir.mkdir(parents=True, exist_ok=True)

denia_harbour_bbox: Dict[str, Union[float, str]] = {"west": 0.10594089795383788, "east": 0.12937267590793944, "south": 38.83464299556706, "north": 38.85035302841166, "crs": "EPSG:4326"}
temporal_extent: List[str] = ["2021-01-01", "2021-04-01"]

collection = ("TERRASCOPE_S2_TOC_V2", ["B06", "B05", "B03"])
band_names = ["swir1", "nir", "green"]
percentile = 0.2

dr: pd.DatetimeIndex = pd.date_range(start=temporal_extent[0], end=temporal_extent[1], freq="MS")
t_intervals = [[str(d), str(dr[i+1])] for i, d in enumerate(dr[:-1])]

dc: DataCube = con.load_collection(
        collection_id=collection[0],
        spatial_extent=denia_harbour_bbox,
        temporal_extent=temporal_extent,
        bands=collection[1]
    ).add_dimension(name="source_name", label=collection[0], type="other") \
    .rename_labels(dimension="bands", source=collection[1], target=band_names)

# Create bucketed DC based on percentile of images
t_bucketed_dc: DataCube = dc \
    .aggregate_temporal(
        intervals=t_intervals,
        reducer=lambda data: processes.quantiles(data, probabilities=[percentile]),
        labels=[t_int[0] for t_int in t_intervals]
    )

print(len(t_intervals))

count_dc: DataCube = dc.band("green") \
    .apply(lambda data: processes.add(x=processes.multiply(x=data, y=0), y=1)) \
    .aggregate_temporal(
        intervals=t_intervals,
        reducer=lambda data: processes.sum(data),
        labels=[t_int[0] for t_int in t_intervals]
    )

mask: DataCube = count_dc.apply(lambda val: processes.gt(x=val, y=5))  # will invert later
# Try to reduce over time to filter for missing images at any timestep
mask_no_t = mask.reduce_dimension(dimension="t", reducer=processes.product).apply(lambda data: processes.eq(x=data, y=0))  # first product, then invert

dc = t_bucketed_dc.mask(mask_no_t)

# t_bucketed_dc.download(out_dir / "test_time.nc", format="netcdf")
# mask_no_t.download(out_dir / "test.nc", format="netcdf")
# count_dc.download(out_dir / "testcount.nc", format="netcdf")
# mask.download(out_dir / "mask.nc", format="netcdf")
# dc.download(out_dir / "res_test.nc", format="netcdf")

Authenticated using refresh token.
3


In [4]:
urls = get_urls_from_dc(dc, job_name="test_masking", format="GTiff")
print(urls)

2022-01-21 15:23:36,351 - utils - INFO - 0:00:40 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:23:42,188 - utils - INFO - 0:00:45 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:23:49,505 - utils - INFO - 0:00:53 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:23:58,764 - utils - INFO - 0:01:02 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:24:09,576 - utils - INFO - 0:01:13 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:24:23,552 - utils - INFO - 0:01:27 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:24:41,533 - utils - INFO - 0:01:45 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:25:01,417 - utils - INFO - 0:02:05 Job 'vito-602a07a8-e214-4c2c-b698-840263dfc090': queued (progress N/A)
2022-01-21 15:25:25,991 - utils - INFO -

['https://openeo.vito.be/openeo/1.0/jobs/602a07a8-e214-4c2c-b698-840263dfc090/results/Zjg5MzBhNWQxNmJhMTMxNWE1ZTVkNzM4MTJiZDRmZjM1ZTkxM2Y4NWM4OTQ5NzEyMzE0YjYxMTE0MjZmY2MyY0BlZ2kuZXU%3D/1f05c588c01371422f76f39aa967c266/openEO_2021-01-01Z.tif?expires=1643383644', 'https://openeo.vito.be/openeo/1.0/jobs/602a07a8-e214-4c2c-b698-840263dfc090/results/Zjg5MzBhNWQxNmJhMTMxNWE1ZTVkNzM4MTJiZDRmZjM1ZTkxM2Y4NWM4OTQ5NzEyMzE0YjYxMTE0MjZmY2MyY0BlZ2kuZXU%3D/24267b53fb2dcb815065e0102d92ac3c/openEO_2021-02-01Z.tif?expires=1643383644', 'https://openeo.vito.be/openeo/1.0/jobs/602a07a8-e214-4c2c-b698-840263dfc090/results/Zjg5MzBhNWQxNmJhMTMxNWE1ZTVkNzM4MTJiZDRmZjM1ZTkxM2Y4NWM4OTQ5NzEyMzE0YjYxMTE0MjZmY2MyY0BlZ2kuZXU%3D/fbf531c1c40ca7cbd5484ef1cc942b0a/openEO_2021-03-01Z.tif?expires=1643383644']


In [5]:
! conda install -y -c conda-forge rioxarray

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - rioxarray


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    rioxarray-0.9.1            |     pyhd8ed1ab_0          44 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          44 KB

The following NEW packages will be INSTALLED:

  rioxarray          conda-forge/noarch::rioxarray-0.9.1-pyhd8ed1ab_0



Downloading and Extracting Packages
rioxarray-0.9.1      | 44 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


data array now shows `2**15` as filtered values. I am unsure why, pehaps a difference between netcdf and GeoTiffs.

In [6]:
import xarray as xr
dim = xr.Variable(dims="t", data=dr[:-1])
dataset = xr.concat([xr.open_dataset(url, engine="rasterio") for url in urls], dim=dim).assign_coords({"band": ["green", "nir1", "swir"]})
dataset

In [50]:
import hvplot.xarray

dataset.sel(band="swir").hvplot(
    groupby="t",
    cmap="turbo",
    widget_type="scrubber",
    widget_location="bottom"
)