From 9dbb258d0d0fd191e3485f5ff98f07efccfbb6d7 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sat, 15 Jul 2023 22:11:51 +0200 Subject: [PATCH] Add doc strings to datasets to provide the correct dataset references --- torch_em/data/datasets/axondeepseg.py | 7 ++ torch_em/data/datasets/covid_if.py | 7 ++ torch_em/data/datasets/cremi.py | 80 ++++++++++++---------- torch_em/data/datasets/deepbacs.py | 7 ++ torch_em/data/datasets/dsb.py | 7 ++ torch_em/data/datasets/hpa.py | 7 ++ torch_em/data/datasets/isbi2012.py | 8 ++- torch_em/data/datasets/kasthuri.py | 6 ++ torch_em/data/datasets/livecell.py | 14 +++- torch_em/data/datasets/lizard.py | 6 ++ torch_em/data/datasets/lucchi.py | 6 ++ torch_em/data/datasets/mitoem.py | 6 ++ torch_em/data/datasets/mouse_embryo.py | 5 ++ torch_em/data/datasets/neurips_cell_seg.py | 10 +++ torch_em/data/datasets/nuc_mm.py | 6 ++ torch_em/data/datasets/plantseg.py | 6 ++ torch_em/data/datasets/platynereis.py | 24 +++++++ torch_em/data/datasets/snemi.py | 56 ++++++++------- torch_em/data/datasets/sponge_em.py | 6 ++ torch_em/data/datasets/tissuenet.py | 8 +++ torch_em/data/datasets/uro_cell.py | 6 ++ torch_em/data/datasets/vnc.py | 6 ++ 22 files changed, 226 insertions(+), 68 deletions(-) diff --git a/torch_em/data/datasets/axondeepseg.py b/torch_em/data/datasets/axondeepseg.py index 0fae333d..856a0852 100644 --- a/torch_em/data/datasets/axondeepseg.py +++ b/torch_em/data/datasets/axondeepseg.py @@ -127,6 +127,11 @@ def _require_axondeepseg_data(path, name, download): def get_axondeepseg_dataset( path, name, patch_shape, download=False, one_hot_encoding=False, data_fraction=None, split=None, **kwargs ): + """Dataset for the segmentation of myelinated axons in EM. + + This dataset is from the publication https://doi.org/10.1038/s41598-018-22181-4. + Please cite it if you use this dataset for a publication. + """ if isinstance(name, str): name = [name] assert isinstance(name, (tuple, list)) @@ -168,6 +173,8 @@ def get_axondeepseg_loader( download=False, one_hot_encoding=False, data_fraction=None, split=None, **kwargs ): + """Dataloader for the segmentation of myelinated axons. See 'get_axondeepseg_dataset' for details. + """ ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) dataset = get_axondeepseg_dataset( path, name, patch_shape, download=download, one_hot_encoding=one_hot_encoding, diff --git a/torch_em/data/datasets/covid_if.py b/torch_em/data/datasets/covid_if.py index d2885989..22d3d53d 100644 --- a/torch_em/data/datasets/covid_if.py +++ b/torch_em/data/datasets/covid_if.py @@ -25,6 +25,11 @@ def get_covid_if_dataset( path, patch_shape, sample_range=None, target="cells", download=False, offsets=None, boundaries=False, binary=False, **kwargs ): + """Dataset for the cells and nuclei in immunofluorescence. + + This dataset is from the publication https://doi.org/10.1002/bies.202000257. + Please cite it if you use this dataset for a publication. + """ available_targets = ("cells", "nuclei") # TODO also support infected_cells # available_targets = ("cells", "nuclei", "infected_cells") @@ -63,6 +68,8 @@ def get_covid_if_loader( path, patch_shape, batch_size, sample_range=None, target="cells", download=False, offsets=None, boundaries=False, binary=False, **kwargs ): + """Dataloader for the segmentation of myelinated axons. See 'get_covid_if_loader' for details. + """ ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) dataset = get_covid_if_dataset( path, patch_shape, sample_range=sample_range, target=target, download=download, diff --git a/torch_em/data/datasets/cremi.py b/torch_em/data/datasets/cremi.py index e035eba8..d0a07c3f 100644 --- a/torch_em/data/datasets/cremi.py +++ b/torch_em/data/datasets/cremi.py @@ -25,44 +25,6 @@ # TODO add support for realigned volumes -def get_cremi_loader( - path, - patch_shape, - batch_size, - samples=("A", "B", "C"), - use_realigned=False, - download=False, - offsets=None, - boundaries=False, - rois={}, - defect_augmentation_kwargs={ - "p_drop_slice": 0.025, - "p_low_contrast": 0.025, - "p_deform_slice": 0.0, - "deformation_mode": "compress", - }, - **kwargs, -): - """ - """ - dataset_kwargs, loader_kwargs = util.split_kwargs( - torch_em.default_segmentation_dataset, **kwargs - ) - ds = get_cremi_dataset( - path=path, - patch_shape=patch_shape, - samples=samples, - use_realigned=use_realigned, - download=download, - offsets=offsets, - boundaries=boundaries, - rois=rois, - defect_augmentation_kwargs=defect_augmentation_kwargs, - **dataset_kwargs, - ) - return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) - - def get_cremi_dataset( path, patch_shape, @@ -80,6 +42,10 @@ def get_cremi_dataset( }, **kwargs, ): + """Dataset for the segmentation of neurons in EM. + + This dataset is from the CREMI challenge: https://cremi.org/. + """ assert len(patch_shape) == 3 if rois is not None: assert isinstance(rois, dict) @@ -132,3 +98,41 @@ def get_cremi_dataset( ) return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape, **kwargs) + + +def get_cremi_loader( + path, + patch_shape, + batch_size, + samples=("A", "B", "C"), + use_realigned=False, + download=False, + offsets=None, + boundaries=False, + rois={}, + defect_augmentation_kwargs={ + "p_drop_slice": 0.025, + "p_low_contrast": 0.025, + "p_deform_slice": 0.0, + "deformation_mode": "compress", + }, + **kwargs, +): + """Dataset for the segmentation of neurons in EM. See 'get_cremi_dataset' for details. + """ + dataset_kwargs, loader_kwargs = util.split_kwargs( + torch_em.default_segmentation_dataset, **kwargs + ) + ds = get_cremi_dataset( + path=path, + patch_shape=patch_shape, + samples=samples, + use_realigned=use_realigned, + download=download, + offsets=offsets, + boundaries=boundaries, + rois=rois, + defect_augmentation_kwargs=defect_augmentation_kwargs, + **dataset_kwargs, + ) + return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) diff --git a/torch_em/data/datasets/deepbacs.py b/torch_em/data/datasets/deepbacs.py index 25d5647b..32bd582b 100644 --- a/torch_em/data/datasets/deepbacs.py +++ b/torch_em/data/datasets/deepbacs.py @@ -43,6 +43,11 @@ def _get_paths(path, bac_type, split): def get_deepbacs_dataset( path, split, patch_shape, bac_type="mixed", download=False, **kwargs ): + """Dataset for the segmentation of bacteria in light microscopy. + + This dataset is from the publication https://doi.org/10.1038/s42003-022-03634-z. + Please cite it if you use this dataset for a publication. + """ assert split in ("train", "test") bac_types = list(URLS.keys()) assert bac_type in bac_types, f"{bac_type} is not in expected bacteria types: {bac_types}" @@ -60,6 +65,8 @@ def get_deepbacs_dataset( def get_deepbacs_loader(path, split, patch_shape, batch_size, bac_type="mixed", download=False, **kwargs): + """Dataloader for the segmentation of bacteria in light microscopy. See 'get_deepbacs_dataset' for details. + """ ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) dataset = get_deepbacs_dataset(path, split, patch_shape, bac_type=bac_type, download=download, **ds_kwargs) loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) diff --git a/torch_em/data/datasets/dsb.py b/torch_em/data/datasets/dsb.py index fd244285..559d573c 100644 --- a/torch_em/data/datasets/dsb.py +++ b/torch_em/data/datasets/dsb.py @@ -38,6 +38,11 @@ def get_dsb_dataset( offsets=None, boundaries=False, binary=False, source="reduced", **kwargs ): + """Dataset for the segmentation of nuclei in light microscopy. + + This dataset is from the publication https://doi.org/10.1038/s41592-019-0612-7. + Please cite it if you use this dataset for a publication. + """ assert split in ("test", "train"), split _download_dsb(path, source, download) @@ -58,6 +63,8 @@ def get_dsb_loader( offsets=None, boundaries=False, binary=False, source="reduced", **kwargs ): + """Dataloader for the segmentation of nuclei in light microscopy. See 'get_dsb_dataset' for details. + """ ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/hpa.py b/torch_em/data/datasets/hpa.py index dae1b0e1..dc54062f 100644 --- a/torch_em/data/datasets/hpa.py +++ b/torch_em/data/datasets/hpa.py @@ -312,6 +312,11 @@ def get_hpa_segmentation_dataset( channels=["microtubules", "protein", "nuclei", "er"], download=False, n_workers_preproc=8, **kwargs ): + """Dataset for the segmentation of cells in light microscopy. + + This dataset is from the publication https://doi.org/10.1038/s41592-019-0658-6. + Please cite it if you use this dataset for a publication. + """ data_is_complete = _check_data(path) if not data_is_complete: _download_hpa_data(path, "segmentation", download) @@ -336,6 +341,8 @@ def get_hpa_segmentation_loader( channels=["microtubules", "protein", "nuclei", "er"], download=False, n_workers_preproc=8, **kwargs ): + """Dataloader for the segmentation of cells in light microscopy. See 'get_hpa_segmentation_dataset' for details. + """ ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/isbi2012.py b/torch_em/data/datasets/isbi2012.py index 309b3554..b3f390fd 100644 --- a/torch_em/data/datasets/isbi2012.py +++ b/torch_em/data/datasets/isbi2012.py @@ -9,6 +9,11 @@ def get_isbi_dataset( path, patch_shape, download=False, offsets=None, boundaries=False, use_original_labels=False, **kwargs ): + """Dataset for the segmentation of neurons in EM. + + This dataset is from the publication https://doi.org/10.3389/fnana.2015.00142. + Please cite it if you use this dataset for a publication. + """ if not path.endswith(".h5"): raise ValueError("Isbi path must be a hdf5 file.") assert len(patch_shape) == 3 @@ -32,8 +37,7 @@ def get_isbi_loader( use_original_labels=False, **kwargs ): - """ - """ + """Dataloader for the segmentation of neurons in EM. See 'get_isbi_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/kasthuri.py b/torch_em/data/datasets/kasthuri.py index f1a6c21a..76b30869 100644 --- a/torch_em/data/datasets/kasthuri.py +++ b/torch_em/data/datasets/kasthuri.py @@ -81,6 +81,11 @@ def _require_kasthuri_data(path, download): def get_kasthuri_dataset(path, split, patch_shape, download=False, **kwargs): + """Dataset for the segmentation of mitochondria in EM. + + This dataset is from the publication https://doi.org/10.48550/arXiv.1812.06024. + Please cite it if you use this dataset for a publication. + """ assert split in ("train", "test") _require_kasthuri_data(path, download) data_path = os.path.join(path, f"kasthuri_{split}.h5") @@ -90,6 +95,7 @@ def get_kasthuri_dataset(path, split, patch_shape, download=False, **kwargs): def get_kasthuri_loader(path, split, patch_shape, batch_size, download=False, **kwargs): + """Dataloader for the segmentation of mitochondria in EM. See 'get_kasthuri_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/livecell.py b/torch_em/data/datasets/livecell.py index e4c63b42..df50c843 100644 --- a/torch_em/data/datasets/livecell.py +++ b/torch_em/data/datasets/livecell.py @@ -149,6 +149,11 @@ def get_livecell_dataset( offsets=None, boundaries=False, binary=False, cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs ): + """Dataset for the segmentation of cells in phase-contrast microscopy. + + This dataset is from the publication https://doi.org/10.1038/s41592-021-01249-6. + Please cite it if you use this dataset for a publication. + """ assert split in ("train", "val", "test") if cell_types is not None: assert isinstance(cell_types, (list, tuple)),\ @@ -169,9 +174,12 @@ def get_livecell_dataset( return dataset -def get_livecell_loader(path, split, patch_shape, batch_size, download=False, - offsets=None, boundaries=False, binary=False, - cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs): +def get_livecell_loader( + path, split, patch_shape, batch_size, download=False, + offsets=None, boundaries=False, binary=False, + cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs +): + """Dataloader for the segmentation of cells in phase-contrast microscopy. See 'get_livecell_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) dataset = get_livecell_dataset( path, split, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, diff --git a/torch_em/data/datasets/lizard.py b/torch_em/data/datasets/lizard.py index 7cc1b8cc..30384e91 100644 --- a/torch_em/data/datasets/lizard.py +++ b/torch_em/data/datasets/lizard.py @@ -80,6 +80,11 @@ def _require_lizard_data(path, download): def get_lizard_dataset(path, patch_shape, download=False, **kwargs): + """Dataset for the segmentation of nuclei in histopathology. + + This dataset is from the publication https://doi.org/10.48550/arXiv.2108.11195. + Please cite it if you use this dataset for a publication. + """ _require_lizard_data(path, download) data_paths = glob(os.path.join(path, "*.h5")) @@ -96,6 +101,7 @@ def get_lizard_dataset(path, patch_shape, download=False, **kwargs): # TODO implement selecting different tissue types # TODO implement train / val / test split (is pre-defined in a csv) def get_lizard_loader(path, patch_shape, batch_size, download=False, **kwargs): + """Dataloader for the segmentation of nuclei in histopathology. See 'get_lizard_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/lucchi.py b/torch_em/data/datasets/lucchi.py index ea3116a9..981c4eef 100644 --- a/torch_em/data/datasets/lucchi.py +++ b/torch_em/data/datasets/lucchi.py @@ -78,6 +78,11 @@ def _require_lucchi_data(path, download): def get_lucchi_dataset(path, split, patch_shape, download=False, **kwargs): + """Dataset for the segmentation of mitochondria in EM. + + This dataset is from the publication https://doi.org/10.48550/arXiv.1812.06024. + Please cite it if you use this dataset for a publication. + """ assert split in ("train", "test") _require_lucchi_data(path, download) data_path = os.path.join(path, f"lucchi_{split}.h5") @@ -87,6 +92,7 @@ def get_lucchi_dataset(path, split, patch_shape, download=False, **kwargs): def get_lucchi_loader(path, split, patch_shape, batch_size, download=False, **kwargs): + """Dataloader for the segmentation of mitochondria in EM. See 'get_lucchi_dataset' for details""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/mitoem.py b/torch_em/data/datasets/mitoem.py index e8ff86c1..b78128b1 100644 --- a/torch_em/data/datasets/mitoem.py +++ b/torch_em/data/datasets/mitoem.py @@ -135,6 +135,11 @@ def get_mitoem_dataset( binary=False, **kwargs, ): + """Dataset for the segmentation of mitochondria in EM. + + This dataset is from the publication https://doi.org/10.1007/978-3-030-59722-1_7. + Please cite it if you use this dataset for a publication. + """ assert len(patch_shape) == 3 if isinstance(splits, str): splits = [splits] @@ -175,6 +180,7 @@ def get_mitoem_loader( binary=False, **kwargs, ): + """Dataloader for the segmentation of mitochondria in EM. See 'get_mitoem_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/mouse_embryo.py b/torch_em/data/datasets/mouse_embryo.py index 10f0ef77..fb6839dd 100644 --- a/torch_em/data/datasets/mouse_embryo.py +++ b/torch_em/data/datasets/mouse_embryo.py @@ -30,6 +30,10 @@ def get_mouse_embryo_dataset( binary=False, **kwargs, ): + """Dataset for the segmentation of nuclei in confocal microscopy. + + This dataset is stored on zenodo: https://zenodo.org/record/6546550. + """ assert name in ("membrane", "nuclei") assert split in ("train", "val") assert len(patch_shape) == 3 @@ -62,6 +66,7 @@ def get_mouse_embryo_loader( binary=False, **kwargs, ): + """Dataloader for the segmentation of nuclei in confocal microscopy. See 'get_mouse_embryo_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/neurips_cell_seg.py b/torch_em/data/datasets/neurips_cell_seg.py index 27a1660b..9a55bb26 100644 --- a/torch_em/data/datasets/neurips_cell_seg.py +++ b/torch_em/data/datasets/neurips_cell_seg.py @@ -75,6 +75,10 @@ def get_neurips_cellseg_supervised_dataset( sampler=None, val_fraction=0.1, ): + """Dataset for the segmentation of cells in light microscopy. + + This dataset is part of the NeuRIPS Cell Segmentation challenge: https://neurips22-cellseg.grand-challenge.org/. + """ assert split in ("train", "val", None), split image_paths, label_paths = _get_image_and_label_paths(root, split, val_fraction) @@ -111,6 +115,7 @@ def get_neurips_cellseg_supervised_loader( val_fraction=0.1, **loader_kwargs ): + """Dataloader for the segmentation of cells in light microscopy. See 'get_neurips_cellseg_supervised_dataset'.""" ds = get_neurips_cellseg_supervised_dataset( root, split, patch_shape, make_rgb=make_rgb, label_transform=label_transform, label_transform2=label_transform2, raw_transform=raw_transform, transform=transform, @@ -157,6 +162,10 @@ def get_neurips_cellseg_unsupervised_dataset( use_images=True, use_wholeslide=True, ): + """Dataset for the segmentation of cells in light microscopy. + + This dataset is part of the NeuRIPS Cell Segmentation challenge: https://neurips22-cellseg.grand-challenge.org/. + """ if raw_transform is None: trafo = to_rgb if make_rgb else None raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo) @@ -196,6 +205,7 @@ def get_neurips_cellseg_unsupervised_loader( use_wholeslide=True, **loader_kwargs, ): + """Dataloader for the segmentation of cells in light microscopy. See 'get_neurips_cellseg_unsupervised_dataset'.""" ds = get_neurips_cellseg_unsupervised_dataset( root, patch_shape, make_rgb=make_rgb, raw_transform=raw_transform, transform=transform, dtype=dtype, sampler=sampler, use_images=use_images, use_wholeslide=use_wholeslide diff --git a/torch_em/data/datasets/nuc_mm.py b/torch_em/data/datasets/nuc_mm.py index 03b8ebd2..bf1c41fd 100644 --- a/torch_em/data/datasets/nuc_mm.py +++ b/torch_em/data/datasets/nuc_mm.py @@ -44,6 +44,11 @@ def _require_dataset(path, sample): def get_nuc_mm_dataset(path, sample, split, patch_shape, download=False, **kwargs): + """Dataset for the segmentation of nuclei in EM and X-Ray. + + This dataset is from the publication https://doi.org/10.1007/978-3-030-87193-2_16. + Please cite it if you use this dataset for a publication. + """ assert sample in ("mouse", "zebrafish") assert split in ("train", "val") @@ -61,6 +66,7 @@ def get_nuc_mm_dataset(path, sample, split, patch_shape, download=False, **kwarg def get_nuc_mm_loader(path, sample, split, patch_shape, batch_size, download=False, **kwargs): + """Dataset for the segmentation of nuclei in EM and X-Ray. See 'get_nuc_mm_dataset' for details.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/plantseg.py b/torch_em/data/datasets/plantseg.py index 2af977c0..1a85f1f2 100644 --- a/torch_em/data/datasets/plantseg.py +++ b/torch_em/data/datasets/plantseg.py @@ -71,6 +71,11 @@ def get_plantseg_dataset( binary=False, **kwargs, ): + """Dataset for the segmentation of plant cells in confocal and light-sheet microscopy. + + This dataset is from the publication https://doi.org/10.7554/eLife.57613. + Please cite it if you use this dataset for a publication. + """ assert len(patch_shape) == 3 data_path = _require_plantseg_data(path, download, name, split) @@ -99,6 +104,7 @@ def get_plantseg_loader( binary=False, **kwargs, ): + """Dataloader for the segmentation of cells in confocal and light-sheet microscopy. See 'get_plantseg_dataset'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/platynereis.py b/torch_em/data/datasets/platynereis.py index 43dd3768..95d51c38 100644 --- a/torch_em/data/datasets/platynereis.py +++ b/torch_em/data/datasets/platynereis.py @@ -44,6 +44,11 @@ def _check_data(path, prefix, extension, n_files): def get_platynereis_cuticle_dataset(path, patch_shape, sample_ids=None, download=False, **kwargs): + """Dataset for the segmentation of cuticle in EM. + + This dataset is from the publication https://doi.org/10.1016/j.cell.2021.07.017. + Please cite it if you use this dataset for a publication. + """ cuticle_root = os.path.join(path, "cuticle") ext = ".n5" @@ -64,6 +69,7 @@ def get_platynereis_cuticle_dataset(path, patch_shape, sample_ids=None, download def get_platynereis_cuticle_loader( path, patch_shape, batch_size, sample_ids=None, download=False, **kwargs ): + """Dataloader for the segmentation of cuticle in EM. See 'get_platynereis_cuticle_loader'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) @@ -78,6 +84,11 @@ def get_platynereis_cilia_dataset( offsets=None, boundaries=False, binary=False, download=False, **kwargs ): + """Dataset for the segmentation of cilia in EM. + + This dataset is from the publication https://doi.org/10.1016/j.cell.2021.07.017. + Please cite it if you use this dataset for a publication. + """ assert split in ("train", "val") cilia_root = os.path.join(path, "cilia") @@ -102,6 +113,7 @@ def get_platynereis_cilia_loader( offsets=None, boundaries=False, binary=False, download=False, **kwargs ): + """Dataloader for the segmentation of cilia in EM. See 'get_platynereis_cilia_dataset'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) @@ -119,6 +131,11 @@ def get_platynereis_cell_dataset( offsets=None, boundaries=False, download=False, **kwargs ): + """Dataset for the segmentation of cells in EM. + + This dataset is from the publication https://doi.org/10.1016/j.cell.2021.07.017. + Please cite it if you use this dataset for a publication. + """ cell_root = os.path.join(path, "membrane") prefix = "train_data_membrane_" @@ -157,6 +174,7 @@ def get_platynereis_cell_loader( offsets=None, boundaries=False, download=False, **kwargs ): + """Dataloader for the segmentation of cells in EM. See 'get_platynereis_cell_dataset'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) @@ -173,6 +191,11 @@ def get_platynereis_nuclei_dataset( offsets=None, boundaries=False, binary=False, download=False, **kwargs, ): + """Dataset for the segmentation of nuclei in EM. + + This dataset is from the publication https://doi.org/10.1016/j.cell.2021.07.017. + Please cite it if you use this dataset for a publication. + """ nuc_root = os.path.join(path, "nuclei") prefix = "train_data_nuclei_" ext = ".h5" @@ -211,6 +234,7 @@ def get_platynereis_nuclei_loader( offsets=None, boundaries=False, binary=False, download=False, **kwargs ): + """Dataloader for the segmentation of nuclei in EM. See 'get_platynereis_nuclei_dataset'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/snemi.py b/torch_em/data/datasets/snemi.py index 3b9148f6..ca1255f2 100644 --- a/torch_em/data/datasets/snemi.py +++ b/torch_em/data/datasets/snemi.py @@ -13,31 +13,6 @@ } -def get_snemi_loader( - path, - patch_shape, - batch_size, - sample="train", - download=False, - offsets=None, - boundaries=False, - **kwargs, -): - ds_kwargs, loader_kwargs = util.split_kwargs( - torch_em.default_segmentation_dataset, **kwargs - ) - ds = get_snemi_dataset( - path=path, - patch_shape=patch_shape, - sample=sample, - download=download, - offsets=offsets, - boundaries=boundaries, - **ds_kwargs, - ) - return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) - - def get_snemi_dataset( path, patch_shape, @@ -47,6 +22,11 @@ def get_snemi_dataset( boundaries=False, **kwargs, ): + """Dataset for the segmentation of neurons in EM. + + This dataset is from the publication https://doi.org/10.1016/j.cell.2015.06.054. + Please cite it if you use this dataset for a publication. + """ assert len(patch_shape) == 3 os.makedirs(path, exist_ok=True) @@ -62,3 +42,29 @@ def get_snemi_dataset( raw_key = "volumes/raw" label_key = "volumes/labels/neuron_ids" return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs) + + +def get_snemi_loader( + path, + patch_shape, + batch_size, + sample="train", + download=False, + offsets=None, + boundaries=False, + **kwargs, +): + """Dataloader for the segmentation of neurons in EM. See 'get_snemi_dataset'.""" + ds_kwargs, loader_kwargs = util.split_kwargs( + torch_em.default_segmentation_dataset, **kwargs + ) + ds = get_snemi_dataset( + path=path, + patch_shape=patch_shape, + sample=sample, + download=download, + offsets=offsets, + boundaries=boundaries, + **ds_kwargs, + ) + return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) diff --git a/torch_em/data/datasets/sponge_em.py b/torch_em/data/datasets/sponge_em.py index 15de1b57..f5553584 100644 --- a/torch_em/data/datasets/sponge_em.py +++ b/torch_em/data/datasets/sponge_em.py @@ -16,6 +16,11 @@ def _require_sponge_em_data(path, download): def get_sponge_em_dataset(path, mode, patch_shape, sample_ids=None, download=False, **kwargs): + """Dataset for the segmentation of sponge cells and organelles in EM. + + This dataset is from the publication https://doi.org/10.1126/science.abj2949. + Please cite it if you use this dataset for a publication. + """ assert mode in ("semantic", "instances") n_files = len(glob(os.path.join(path, "*.h5"))) @@ -34,6 +39,7 @@ def get_sponge_em_dataset(path, mode, patch_shape, sample_ids=None, download=Fal def get_sponge_em_loader(path, mode, patch_shape, batch_size, sample_ids=None, download=False, **kwargs): + """Dataloader for the segmentation of sponge cells and organelles in EM. See 'get_sponge_em_dataset'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/tissuenet.py b/torch_em/data/datasets/tissuenet.py index 7d41f2f3..f6098eee 100644 --- a/torch_em/data/datasets/tissuenet.py +++ b/torch_em/data/datasets/tissuenet.py @@ -53,6 +53,11 @@ def _create_dataset(path, zip_path): def get_tissuenet_dataset( path, split, patch_shape, raw_channel, label_channel, download=False, **kwargs ): + """Dataset for the segmentation of cells in tissue imaged with light microscopy. + + This dataset is from the publication https://doi.org/10.1038/s41587-021-01094-0. + Please cite it if you use this dataset for a publication. + """ assert raw_channel in ("nucleus", "cell", "rgb") assert label_channel in ("nucleus", "cell") @@ -90,6 +95,9 @@ def get_tissuenet_dataset( def get_tissuenet_loader( path, split, patch_shape, batch_size, raw_channel, label_channel, download=False, **kwargs ): + """Dataloader for the segmentation of cells in tissue imaged with light microscopy. + See 'get_tissuenet_dataset' for details. + """ ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) dataset = get_tissuenet_dataset( path, split, patch_shape, raw_channel, label_channel, download, **ds_kwargs diff --git a/torch_em/data/datasets/uro_cell.py b/torch_em/data/datasets/uro_cell.py index 332c2e07..fb8e92ad 100644 --- a/torch_em/data/datasets/uro_cell.py +++ b/torch_em/data/datasets/uro_cell.py @@ -85,6 +85,11 @@ def get_uro_cell_dataset( binary=False, **kwargs ): + """Dataset for the segmentation of mitochondria and other organelles in EM. + + This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. + Please cite it if you use this dataset for a publication. + """ assert target in ("fv", "golgi", "lyso", "mito") _require_urocell_data(path, download) paths, label_key = _get_paths(path, target) @@ -132,6 +137,7 @@ def get_uro_cell_loader( binary=False, **kwargs ): + """Dataloader for the segmentation of mitochondria and other organelles in EM. See 'get_uro_cell_dataset'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs ) diff --git a/torch_em/data/datasets/vnc.py b/torch_em/data/datasets/vnc.py index be623296..676dc132 100644 --- a/torch_em/data/datasets/vnc.py +++ b/torch_em/data/datasets/vnc.py @@ -59,6 +59,11 @@ def get_vnc_mito_dataset( download=False, **kwargs ): + """Dataset for the segmentation of mitochondria in EM. + + This dataset is from https://doi.org/10.6084/m9.figshare.856713.v1. + Please cite it if you use this dataset for a publication. + """ _get_vnc_data(path, download) data_path = os.path.join(path, "vnc_train.h5") @@ -81,6 +86,7 @@ def get_vnc_mito_loader( download=False, **kwargs ): + """Dataloader for the segmentation of mitochondria in EM. See 'get_vnc_mito_loader'.""" ds_kwargs, loader_kwargs = util.split_kwargs( torch_em.default_segmentation_dataset, **kwargs )