Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add doc strings to datasets to provide the correct dataset references #143

Merged
merged 1 commit into from
Jul 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions torch_em/data/datasets/axondeepseg.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ def _require_axondeepseg_data(path, name, download):
def get_axondeepseg_dataset(
path, name, patch_shape, download=False, one_hot_encoding=False, data_fraction=None, split=None, **kwargs
):
"""Dataset for the segmentation of myelinated axons in EM.

This dataset is from the publication https://doi.org/10.1038/s41598-018-22181-4.
Please cite it if you use this dataset for a publication.
"""
if isinstance(name, str):
name = [name]
assert isinstance(name, (tuple, list))
Expand Down Expand Up @@ -168,6 +173,8 @@ def get_axondeepseg_loader(
download=False, one_hot_encoding=False,
data_fraction=None, split=None, **kwargs
):
"""Dataloader for the segmentation of myelinated axons. See 'get_axondeepseg_dataset' for details.
"""
ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
dataset = get_axondeepseg_dataset(
path, name, patch_shape, download=download, one_hot_encoding=one_hot_encoding,
Expand Down
7 changes: 7 additions & 0 deletions torch_em/data/datasets/covid_if.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def get_covid_if_dataset(
path, patch_shape, sample_range=None, target="cells", download=False,
offsets=None, boundaries=False, binary=False, **kwargs
):
"""Dataset for the cells and nuclei in immunofluorescence.

This dataset is from the publication https://doi.org/10.1002/bies.202000257.
Please cite it if you use this dataset for a publication.
"""
available_targets = ("cells", "nuclei")
# TODO also support infected_cells
# available_targets = ("cells", "nuclei", "infected_cells")
Expand Down Expand Up @@ -63,6 +68,8 @@ def get_covid_if_loader(
path, patch_shape, batch_size, sample_range=None, target="cells", download=False,
offsets=None, boundaries=False, binary=False, **kwargs
):
"""Dataloader for the segmentation of myelinated axons. See 'get_covid_if_loader' for details.
"""
ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
dataset = get_covid_if_dataset(
path, patch_shape, sample_range=sample_range, target=target, download=download,
Expand Down
80 changes: 42 additions & 38 deletions torch_em/data/datasets/cremi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,44 +25,6 @@


# TODO add support for realigned volumes
def get_cremi_loader(
path,
patch_shape,
batch_size,
samples=("A", "B", "C"),
use_realigned=False,
download=False,
offsets=None,
boundaries=False,
rois={},
defect_augmentation_kwargs={
"p_drop_slice": 0.025,
"p_low_contrast": 0.025,
"p_deform_slice": 0.0,
"deformation_mode": "compress",
},
**kwargs,
):
"""
"""
dataset_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
ds = get_cremi_dataset(
path=path,
patch_shape=patch_shape,
samples=samples,
use_realigned=use_realigned,
download=download,
offsets=offsets,
boundaries=boundaries,
rois=rois,
defect_augmentation_kwargs=defect_augmentation_kwargs,
**dataset_kwargs,
)
return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)


def get_cremi_dataset(
path,
patch_shape,
Expand All @@ -80,6 +42,10 @@ def get_cremi_dataset(
},
**kwargs,
):
"""Dataset for the segmentation of neurons in EM.

This dataset is from the CREMI challenge: https://cremi.org/.
"""
assert len(patch_shape) == 3
if rois is not None:
assert isinstance(rois, dict)
Expand Down Expand Up @@ -132,3 +98,41 @@ def get_cremi_dataset(
)

return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape, **kwargs)


def get_cremi_loader(
path,
patch_shape,
batch_size,
samples=("A", "B", "C"),
use_realigned=False,
download=False,
offsets=None,
boundaries=False,
rois={},
defect_augmentation_kwargs={
"p_drop_slice": 0.025,
"p_low_contrast": 0.025,
"p_deform_slice": 0.0,
"deformation_mode": "compress",
},
**kwargs,
):
"""Dataset for the segmentation of neurons in EM. See 'get_cremi_dataset' for details.
"""
dataset_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
ds = get_cremi_dataset(
path=path,
patch_shape=patch_shape,
samples=samples,
use_realigned=use_realigned,
download=download,
offsets=offsets,
boundaries=boundaries,
rois=rois,
defect_augmentation_kwargs=defect_augmentation_kwargs,
**dataset_kwargs,
)
return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
7 changes: 7 additions & 0 deletions torch_em/data/datasets/deepbacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def _get_paths(path, bac_type, split):
def get_deepbacs_dataset(
path, split, patch_shape, bac_type="mixed", download=False, **kwargs
):
"""Dataset for the segmentation of bacteria in light microscopy.

This dataset is from the publication https://doi.org/10.1038/s42003-022-03634-z.
Please cite it if you use this dataset for a publication.
"""
assert split in ("train", "test")
bac_types = list(URLS.keys())
assert bac_type in bac_types, f"{bac_type} is not in expected bacteria types: {bac_types}"
Expand All @@ -60,6 +65,8 @@ def get_deepbacs_dataset(


def get_deepbacs_loader(path, split, patch_shape, batch_size, bac_type="mixed", download=False, **kwargs):
"""Dataloader for the segmentation of bacteria in light microscopy. See 'get_deepbacs_dataset' for details.
"""
ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
dataset = get_deepbacs_dataset(path, split, patch_shape, bac_type=bac_type, download=download, **ds_kwargs)
loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Expand Down
7 changes: 7 additions & 0 deletions torch_em/data/datasets/dsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ def get_dsb_dataset(
offsets=None, boundaries=False, binary=False,
source="reduced", **kwargs
):
"""Dataset for the segmentation of nuclei in light microscopy.

This dataset is from the publication https://doi.org/10.1038/s41592-019-0612-7.
Please cite it if you use this dataset for a publication.
"""
assert split in ("test", "train"), split
_download_dsb(path, source, download)

Expand All @@ -58,6 +63,8 @@ def get_dsb_loader(
offsets=None, boundaries=False, binary=False,
source="reduced", **kwargs
):
"""Dataloader for the segmentation of nuclei in light microscopy. See 'get_dsb_dataset' for details.
"""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
7 changes: 7 additions & 0 deletions torch_em/data/datasets/hpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,11 @@ def get_hpa_segmentation_dataset(
channels=["microtubules", "protein", "nuclei", "er"],
download=False, n_workers_preproc=8, **kwargs
):
"""Dataset for the segmentation of cells in light microscopy.

This dataset is from the publication https://doi.org/10.1038/s41592-019-0658-6.
Please cite it if you use this dataset for a publication.
"""
data_is_complete = _check_data(path)
if not data_is_complete:
_download_hpa_data(path, "segmentation", download)
Expand All @@ -336,6 +341,8 @@ def get_hpa_segmentation_loader(
channels=["microtubules", "protein", "nuclei", "er"],
download=False, n_workers_preproc=8, **kwargs
):
"""Dataloader for the segmentation of cells in light microscopy. See 'get_hpa_segmentation_dataset' for details.
"""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
8 changes: 6 additions & 2 deletions torch_em/data/datasets/isbi2012.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ def get_isbi_dataset(
path, patch_shape, download=False, offsets=None, boundaries=False,
use_original_labels=False, **kwargs
):
"""Dataset for the segmentation of neurons in EM.

This dataset is from the publication https://doi.org/10.3389/fnana.2015.00142.
Please cite it if you use this dataset for a publication.
"""
if not path.endswith(".h5"):
raise ValueError("Isbi path must be a hdf5 file.")
assert len(patch_shape) == 3
Expand All @@ -32,8 +37,7 @@ def get_isbi_loader(
use_original_labels=False,
**kwargs
):
"""
"""
"""Dataloader for the segmentation of neurons in EM. See 'get_isbi_dataset' for details."""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
6 changes: 6 additions & 0 deletions torch_em/data/datasets/kasthuri.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ def _require_kasthuri_data(path, download):


def get_kasthuri_dataset(path, split, patch_shape, download=False, **kwargs):
"""Dataset for the segmentation of mitochondria in EM.

This dataset is from the publication https://doi.org/10.48550/arXiv.1812.06024.
Please cite it if you use this dataset for a publication.
"""
assert split in ("train", "test")
_require_kasthuri_data(path, download)
data_path = os.path.join(path, f"kasthuri_{split}.h5")
Expand All @@ -90,6 +95,7 @@ def get_kasthuri_dataset(path, split, patch_shape, download=False, **kwargs):


def get_kasthuri_loader(path, split, patch_shape, batch_size, download=False, **kwargs):
"""Dataloader for the segmentation of mitochondria in EM. See 'get_kasthuri_dataset' for details."""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
14 changes: 11 additions & 3 deletions torch_em/data/datasets/livecell.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ def get_livecell_dataset(
offsets=None, boundaries=False, binary=False,
cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs
):
"""Dataset for the segmentation of cells in phase-contrast microscopy.

This dataset is from the publication https://doi.org/10.1038/s41592-021-01249-6.
Please cite it if you use this dataset for a publication.
"""
assert split in ("train", "val", "test")
if cell_types is not None:
assert isinstance(cell_types, (list, tuple)),\
Expand All @@ -169,9 +174,12 @@ def get_livecell_dataset(
return dataset


def get_livecell_loader(path, split, patch_shape, batch_size, download=False,
offsets=None, boundaries=False, binary=False,
cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs):
def get_livecell_loader(
path, split, patch_shape, batch_size, download=False,
offsets=None, boundaries=False, binary=False,
cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs
):
"""Dataloader for the segmentation of cells in phase-contrast microscopy. See 'get_livecell_dataset' for details."""
ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
dataset = get_livecell_dataset(
path, split, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary,
Expand Down
6 changes: 6 additions & 0 deletions torch_em/data/datasets/lizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def _require_lizard_data(path, download):


def get_lizard_dataset(path, patch_shape, download=False, **kwargs):
"""Dataset for the segmentation of nuclei in histopathology.

This dataset is from the publication https://doi.org/10.48550/arXiv.2108.11195.
Please cite it if you use this dataset for a publication.
"""
_require_lizard_data(path, download)

data_paths = glob(os.path.join(path, "*.h5"))
Expand All @@ -96,6 +101,7 @@ def get_lizard_dataset(path, patch_shape, download=False, **kwargs):
# TODO implement selecting different tissue types
# TODO implement train / val / test split (is pre-defined in a csv)
def get_lizard_loader(path, patch_shape, batch_size, download=False, **kwargs):
"""Dataloader for the segmentation of nuclei in histopathology. See 'get_lizard_dataset' for details."""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
6 changes: 6 additions & 0 deletions torch_em/data/datasets/lucchi.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ def _require_lucchi_data(path, download):


def get_lucchi_dataset(path, split, patch_shape, download=False, **kwargs):
"""Dataset for the segmentation of mitochondria in EM.

This dataset is from the publication https://doi.org/10.48550/arXiv.1812.06024.
Please cite it if you use this dataset for a publication.
"""
assert split in ("train", "test")
_require_lucchi_data(path, download)
data_path = os.path.join(path, f"lucchi_{split}.h5")
Expand All @@ -87,6 +92,7 @@ def get_lucchi_dataset(path, split, patch_shape, download=False, **kwargs):


def get_lucchi_loader(path, split, patch_shape, batch_size, download=False, **kwargs):
"""Dataloader for the segmentation of mitochondria in EM. See 'get_lucchi_dataset' for details"""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
6 changes: 6 additions & 0 deletions torch_em/data/datasets/mitoem.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ def get_mitoem_dataset(
binary=False,
**kwargs,
):
"""Dataset for the segmentation of mitochondria in EM.

This dataset is from the publication https://doi.org/10.1007/978-3-030-59722-1_7.
Please cite it if you use this dataset for a publication.
"""
assert len(patch_shape) == 3
if isinstance(splits, str):
splits = [splits]
Expand Down Expand Up @@ -175,6 +180,7 @@ def get_mitoem_loader(
binary=False,
**kwargs,
):
"""Dataloader for the segmentation of mitochondria in EM. See 'get_mitoem_dataset' for details."""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
5 changes: 5 additions & 0 deletions torch_em/data/datasets/mouse_embryo.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def get_mouse_embryo_dataset(
binary=False,
**kwargs,
):
"""Dataset for the segmentation of nuclei in confocal microscopy.

This dataset is stored on zenodo: https://zenodo.org/record/6546550.
"""
assert name in ("membrane", "nuclei")
assert split in ("train", "val")
assert len(patch_shape) == 3
Expand Down Expand Up @@ -62,6 +66,7 @@ def get_mouse_embryo_loader(
binary=False,
**kwargs,
):
"""Dataloader for the segmentation of nuclei in confocal microscopy. See 'get_mouse_embryo_dataset' for details."""
ds_kwargs, loader_kwargs = util.split_kwargs(
torch_em.default_segmentation_dataset, **kwargs
)
Expand Down
10 changes: 10 additions & 0 deletions torch_em/data/datasets/neurips_cell_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def get_neurips_cellseg_supervised_dataset(
sampler=None,
val_fraction=0.1,
):
"""Dataset for the segmentation of cells in light microscopy.

This dataset is part of the NeuRIPS Cell Segmentation challenge: https://neurips22-cellseg.grand-challenge.org/.
"""
assert split in ("train", "val", None), split
image_paths, label_paths = _get_image_and_label_paths(root, split, val_fraction)

Expand Down Expand Up @@ -111,6 +115,7 @@ def get_neurips_cellseg_supervised_loader(
val_fraction=0.1,
**loader_kwargs
):
"""Dataloader for the segmentation of cells in light microscopy. See 'get_neurips_cellseg_supervised_dataset'."""
ds = get_neurips_cellseg_supervised_dataset(
root, split, patch_shape, make_rgb=make_rgb, label_transform=label_transform,
label_transform2=label_transform2, raw_transform=raw_transform, transform=transform,
Expand Down Expand Up @@ -157,6 +162,10 @@ def get_neurips_cellseg_unsupervised_dataset(
use_images=True,
use_wholeslide=True,
):
"""Dataset for the segmentation of cells in light microscopy.

This dataset is part of the NeuRIPS Cell Segmentation challenge: https://neurips22-cellseg.grand-challenge.org/.
"""
if raw_transform is None:
trafo = to_rgb if make_rgb else None
raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo)
Expand Down Expand Up @@ -196,6 +205,7 @@ def get_neurips_cellseg_unsupervised_loader(
use_wholeslide=True,
**loader_kwargs,
):
"""Dataloader for the segmentation of cells in light microscopy. See 'get_neurips_cellseg_unsupervised_dataset'."""
ds = get_neurips_cellseg_unsupervised_dataset(
root, patch_shape, make_rgb=make_rgb, raw_transform=raw_transform, transform=transform,
dtype=dtype, sampler=sampler, use_images=use_images, use_wholeslide=use_wholeslide
Expand Down
Loading
Loading