Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include Omero-channels-metadata update in import-ome-zarr task #579

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
878b946
BROKEN use BIA-data test to catch error in #578
tcompa Oct 18, 2023
b9c2671
Add `update_omero_channels` helper function and its unit test (ref #578)
tcompa Oct 19, 2023
c62a812
Implement first version of `update_omero_metadata` in `import_ome_zar…
tcompa Oct 19, 2023
cb11bec
Update `test_import_ome_zarr_image_BIA` (ref #578)
tcompa Oct 19, 2023
bd323f0
Skip `test_import_ome_zarr_image_BIA`
tcompa Oct 19, 2023
be5b822
Update CHANGELOG
tcompa Oct 19, 2023
96197ac
Improve handling of omero metadata in import-ome-zarr task (ref #578)
tcompa Oct 19, 2023
bf5cf58
Update test_import_ome_zarr_image_BIA
tcompa Oct 19, 2023
cf2d808
Add logging in update_omero_channels
tcompa Oct 19, 2023
3d440f0
Improve logging in `_process_single_image`
tcompa Oct 19, 2023
2f19419
Add `remove_omero` option to `prepare_3D_zarr` test function
tcompa Oct 19, 2023
975ea42
Also test omero-channels addition in `test_import_ome_zarr_image`
tcompa Oct 19, 2023
4ebcb1b
Add failing test for omero-channels update
tcompa Oct 19, 2023
7f01182
Handle existing `wavelength_id`s in omero channels correctly
tcompa Oct 19, 2023
64823df
Expand test_update_omero_channels
tcompa Oct 19, 2023
9c2b0ee
Refactor `update_omero_channels`, to better handle existing `waveleng…
tcompa Oct 19, 2023
63258dd
Fix typo
tcompa Oct 19, 2023
60b4afe
Introduce test_import_ome_zarr_image_wrong_channels
tcompa Oct 19, 2023
07555b1
Minor update to test_import_ome_zarr_image_wrong_channels [skip ci]
tcompa Oct 19, 2023
eecb7b9
Add logs
tcompa Oct 19, 2023
a8fbe73
Remove blank line [skip ci]
tcompa Oct 19, 2023
2ba59a1
Improve comments [skip ci]
tcompa Oct 19, 2023
e399b96
Always add missin `color` omero-channel-metadata attribute, when miss…
tcompa Oct 19, 2023
dc5bb7d
Fix test
tcompa Oct 19, 2023
76b2445
Update CHANGELOG [skip ci]
tcompa Oct 19, 2023
76e7492
Fix docstring [skip ci]
tcompa Oct 19, 2023
32a4ad6
Update CHANGELOG [skip ci]
tcompa Oct 19, 2023
f4113bb
Improve comment [skip ci]
tcompa Oct 19, 2023
0cad3fc
Improve logs in fixture [skip ci]
tcompa Oct 19, 2023
683bcf8
Improve error message and comments
tcompa Oct 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
**Note**: Numbers like (\#123) point to closed Pull Requests on the fractal-tasks-core repository.

# Unreleased
# 0.13.0

* Tasks:
* New task and helper functions:
* Introduce `import_ome_zarr` task (\#557).
* Introduce `import_ome_zarr` task (\#557, \#579).
* Introduce `get_single_image_ROI` and `get_image_grid_ROIs` (\#557).
* Introduce `detect_ome_ngff_type` (\#557).
* Make `maximum_intensity_projection` task not depend on ROI tables (\#557).
* Introduce `update_omero_channels` (\#579).
* Make `maximum_intensity_projection` independent from ROI tables (\#557).
* Make Cellpose task work when `input_ROI_table` is empty (\#566).
* Fix bug of missing attributes in ROI-table Zarr group (\#573).
* Dependencies:
Expand Down
6 changes: 6 additions & 0 deletions fractal_tasks_core/__FRACTAL_MANIFEST__.json
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,12 @@
"type": "integer",
"description": "X shape of the ROI grid in `grid_ROI_table`."
},
"update_omero_metadata": {
"title": "Update Omero Metadata",
"default": true,
"type": "boolean",
"description": "Whether to update Omero-channels metadata, to make them Fractal-compatible."
},
"overwrite": {
"title": "Overwrite",
"default": false,
Expand Down
134 changes: 134 additions & 0 deletions fractal_tasks_core/lib_channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
Helper functions to address channels via OME-NGFF/OMERO metadata.
"""
import logging
from copy import deepcopy
from typing import Any
from typing import Optional
from typing import Union

Expand Down Expand Up @@ -339,3 +341,135 @@ def define_omero_channels(
]

return new_channels_dictionaries


def _get_new_unique_value(
value: str,
existing_values: list[str],
) -> str:
"""
Produce a string value that is not present in a given list

Append `_1`, `_2`, ... to a given string, if needed, until finding a value
which is not already present in `existing_values`.

Args:
value: The first guess for the new value
existing_values: The list of existing values

Returns:
A string value which is not present in `existing_values`
"""
counter = 1
new_value = value
while new_value in existing_values:
new_value = f"{value}-{counter}"
counter += 1
return new_value


def update_omero_channels(
old_channels: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""
Make an existing list of Omero channels Fractal-compatible

The output channels all have keys `label`, `wavelength_id` and `color`;
the `wavelength_id` values are unique across the channel list.

See https://ngff.openmicroscopy.org/0.4/index.html#omero-md for the
definition of NGFF Omero metadata.

Args:
old_channels: Existing list of Omero-channel dictionaries

Returns:
New list of Fractal-compatible Omero-channel dictionaries
"""
new_channels = deepcopy(old_channels)
existing_wavelength_ids: list[str] = []
handled_channels = []

default_colors = ["00FFFF", "FF00FF", "FFFF00"]

def _get_next_color() -> str:
tcompa marked this conversation as resolved.
Show resolved Hide resolved
try:
return default_colors.pop(0)
except IndexError:
return "808080"

# Channels that contain the key "wavelength_id"
for ind, old_channel in enumerate(old_channels):
if "wavelength_id" in old_channel.keys():
handled_channels.append(ind)
existing_wavelength_ids.append(old_channel["wavelength_id"])
new_channel = old_channel.copy()
try:
label = old_channel["label"]
except KeyError:
label = str(ind + 1)
new_channel["label"] = label
if "color" not in old_channel:
new_channel["color"] = _get_next_color()
new_channels[ind] = new_channel

# Channels that contain the key "label" but do not contain the key
# "wavelength_id"
for ind, old_channel in enumerate(old_channels):
if ind in handled_channels:
continue
if "label" not in old_channel.keys():
continue
handled_channels.append(ind)
label = old_channel["label"]
wavelength_id = _get_new_unique_value(
label,
existing_wavelength_ids,
)
existing_wavelength_ids.append(wavelength_id)
new_channel = old_channel.copy()
new_channel["wavelength_id"] = wavelength_id
if "color" not in old_channel:
new_channel["color"] = _get_next_color()
new_channels[ind] = new_channel

# Channels that do not contain the key "label" nor the key "wavelength_id"
# NOTE: these channels must be treated last, as they have lower priority
# w.r.t. existing "wavelength_id" or "label" values
for ind, old_channel in enumerate(old_channels):
if ind in handled_channels:
continue
label = str(ind + 1)
wavelength_id = _get_new_unique_value(
label,
existing_wavelength_ids,
)
existing_wavelength_ids.append(wavelength_id)
new_channel = old_channel.copy()
new_channel["label"] = label
new_channel["wavelength_id"] = wavelength_id
if "color" not in old_channel:
new_channel["color"] = _get_next_color()
new_channels[ind] = new_channel

# Log old/new values of label, wavelength_id and color
for ind, old_channel in enumerate(old_channels):
label = old_channel.get("label")
color = old_channel.get("color")
wavelength_id = old_channel.get("wavelength_id")
old_attributes = (
f"Old attributes: {label=}, {wavelength_id=}, {color=}"
)
label = new_channels[ind]["label"]
wavelength_id = new_channels[ind]["wavelength_id"]
color = new_channels[ind]["color"]
new_attributes = (
f"New attributes: {label=}, {wavelength_id=}, {color=}"
)
logging.info(
"Omero channel update:\n"
f" {old_attributes}\n"
f" {new_attributes}"
)

return new_channels
69 changes: 63 additions & 6 deletions fractal_tasks_core/tasks/import_ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import zarr
from pydantic.decorator import validate_arguments

from fractal_tasks_core.lib_channels import update_omero_channels
from fractal_tasks_core.lib_ngff import detect_ome_ngff_type
from fractal_tasks_core.lib_ngff import NgffImageMeta
from fractal_tasks_core.lib_regions_of_interest import get_image_grid_ROIs
Expand All @@ -34,6 +35,8 @@ def _process_single_image(
image_path: str,
add_image_ROI_table: bool,
add_grid_ROI_table: bool,
update_omero_metadata: bool,
*,
grid_YX_shape: Optional[tuple[int, int]] = None,
overwrite: bool = False,
) -> None:
Expand All @@ -43,14 +46,17 @@ def _process_single_image(
This task:

1. Validates OME-NGFF image metadata, via `NgffImageMeta`;
2. Optionally generates and writes two ROI tables.
2. Optionally generates and writes two ROI tables;
3. Optionally update OME-NGFF omero metadata.

Args:
image_path: Absolute path to the image Zarr group.
add_image_ROI_table: Whether to add a `image_ROI_table` table
(argument propagated from `import_ome_zarr`).
add_grid_ROI_table: Whether to add a `grid_ROI_table` table (argument
propagated from `import_ome_zarr`).
update_omero_metadata: Whether to update Omero-channels metadata
(argument propagated from `import_ome_zarr`).
grid_YX_shape: YX shape of the ROI grid (it must be not `None`, if
`add_grid_ROI_table=True`.
"""
Expand Down Expand Up @@ -100,6 +106,51 @@ def _process_single_image(
logger=logger,
)

# Update Omero-channels metadata
if update_omero_metadata:
# Extract number of channels from zarr array
try:
channel_axis_index = image_meta.axes_names.index("c")
except ValueError:
logger.error(f"Existing axes: {image_meta.axes_names}")
msg = (
"OME-Zarrs with no channel axis are not currently "
"supported in fractal-tasks-core. Upcoming flexibility "
"improvements are tracked in https://github.com/"
"fractal-analytics-platform/fractal-tasks-core/issues/150."
)
logger.error(msg)
raise NotImplementedError(msg)
logger.info(f"Existing axes: {image_meta.axes_names}")
logger.info(f"Channel-axis index: {channel_axis_index}")
num_channels_zarr = array.shape[channel_axis_index]
logger.info(
f"{num_channels_zarr} channel(s) found in Zarr array "
f"at {image_path}/{dataset_subpath}"
)
# Update or create omero channels metadata
old_omero = image_group.attrs.get("omero", {})
old_channels = old_omero.get("channels", [])
if len(old_channels) > 0:
logger.info(
f"{len(old_channels)} channel(s) found in NGFF omero metadata"
)
if len(old_channels) != num_channels_zarr:
error_msg = (
"Channels-number mismatch: Number of channels in the "
f"zarr array ({num_channels_zarr}) differs from number "
"of channels listed in NGFF omero metadata "
f"({len(old_channels)})."
)
logging.error(error_msg)
raise ValueError(error_msg)
else:
old_channels = [{} for ind in range(num_channels_zarr)]
new_channels = update_omero_channels(old_channels)
new_omero = old_omero.copy()
new_omero["channels"] = new_channels
image_group.attrs.update(omero=new_omero)


@validate_arguments
def import_ome_zarr(
Expand All @@ -112,6 +163,7 @@ def import_ome_zarr(
add_grid_ROI_table: bool = True,
grid_y_shape: int = 2,
grid_x_shape: int = 2,
update_omero_metadata: bool = True,
overwrite: bool = False,
) -> dict[str, Any]:
"""
Expand Down Expand Up @@ -141,6 +193,8 @@ def import_ome_zarr(
image, with the image split into a rectangular grid of ROIs.
grid_y_shape: Y shape of the ROI grid in `grid_ROI_table`.
grid_x_shape: X shape of the ROI grid in `grid_ROI_table`.
update_omero_metadata: Whether to update Omero-channels metadata, to
make them Fractal-compatible.
overwrite: Whether new ROI tables (added when `add_image_ROI_table`
and/or `add_grid_ROI_table` are `True`) can overwite existing ones.
"""
Expand Down Expand Up @@ -174,14 +228,15 @@ def import_ome_zarr(
f"{zarr_path}/{well_path}/{image_path}",
add_image_ROI_table,
add_grid_ROI_table,
grid_YX_shape,
update_omero_metadata,
grid_YX_shape=grid_YX_shape,
overwrite=overwrite,
)
elif ngff_type == "well":
zarrurls["well"].append(zarr_name)
logger.warning(
"Only OME-Zarr for plates are fully supported in Fractal; "
"e.g. the current one ({ngff_type=}) cannot be "
f"e.g. the current one ({ngff_type=}) cannot be "
"processed via the `maximum_intensity_projection` task."
)
for image in root_group.attrs["well"]["images"]:
Expand All @@ -191,21 +246,23 @@ def import_ome_zarr(
f"{zarr_path}/{image_path}",
add_image_ROI_table,
add_grid_ROI_table,
grid_YX_shape,
update_omero_metadata,
grid_YX_shape=grid_YX_shape,
overwrite=overwrite,
)
elif ngff_type == "image":
zarrurls["image"].append(zarr_name)
logger.warning(
"Only OME-Zarr for plates are fully supported in Fractal; "
"e.g. the current one ({ngff_type=}) cannot be "
f"e.g. the current one ({ngff_type=}) cannot be "
"processed via the `maximum_intensity_projection` task."
)
_process_single_image(
zarr_path,
add_image_ROI_table,
add_grid_ROI_table,
grid_YX_shape,
update_omero_metadata,
grid_YX_shape=grid_YX_shape,
overwrite=overwrite,
)

Expand Down
13 changes: 13 additions & 0 deletions tests/_zenodo_ome_zarrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
Zurich.
"""
import json
import logging
import shutil
from pathlib import Path
from typing import Any

import dask.array as da
import zarr
from devtools import debug


Expand All @@ -25,6 +27,7 @@ def prepare_3D_zarr(
zenodo_zarr: list[str],
zenodo_zarr_metadata: list[dict[str, Any]],
remove_tables: bool = False,
remove_omero: bool = False,
):
zenodo_zarr_3D, zenodo_zarr_2D = zenodo_zarr[:]
metadata_3D, metadata_2D = zenodo_zarr_metadata[:]
Expand All @@ -35,6 +38,16 @@ def prepare_3D_zarr(
shutil.rmtree(
str(Path(zarr_path) / Path(zenodo_zarr_3D).name / "B/03/0/tables")
)
logging.warning("Removing ROI tables attributes 3D Zenodo zarr")
if remove_omero:
image_group = zarr.open_group(
str(Path(zarr_path) / Path(zenodo_zarr_3D).name / "B/03/0"),
mode="r+",
)
image_attrs = image_group.attrs.asdict()
image_attrs.pop("omero")
image_group.attrs.put(image_attrs)
logging.warning("Removing omero attributes from 3D Zenodo zarr")
metadata = metadata_3D.copy()
return metadata

Expand Down
Loading
Loading