constantinpape · constantinpape · Jul 15, 2023 · Jul 15, 2023
diff --git a/torch_em/data/datasets/axondeepseg.py b/torch_em/data/datasets/axondeepseg.py
@@ -127,6 +127,11 @@ def _require_axondeepseg_data(path, name, download):
 def get_axondeepseg_dataset(
     path, name, patch_shape, download=False, one_hot_encoding=False, data_fraction=None, split=None, **kwargs
 ):
+    """Dataset for the segmentation of myelinated axons in EM.
+
+    This dataset is from the publication https://doi.org/10.1038/s41598-018-22181-4.
+    Please cite it if you use this dataset for a publication.
+    """
     if isinstance(name, str):
         name = [name]
     assert isinstance(name, (tuple, list))
@@ -168,6 +173,8 @@ def get_axondeepseg_loader(
     download=False, one_hot_encoding=False,
     data_fraction=None, split=None, **kwargs
 ):
+    """Dataloader for the segmentation of myelinated axons. See 'get_axondeepseg_dataset' for details.
+    """
     ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
     dataset = get_axondeepseg_dataset(
         path, name, patch_shape, download=download, one_hot_encoding=one_hot_encoding,

diff --git a/torch_em/data/datasets/covid_if.py b/torch_em/data/datasets/covid_if.py
@@ -25,6 +25,11 @@ def get_covid_if_dataset(
     path, patch_shape, sample_range=None, target="cells", download=False,
     offsets=None, boundaries=False, binary=False, **kwargs
 ):
+    """Dataset for the cells and nuclei in immunofluorescence.
+
+    This dataset is from the publication https://doi.org/10.1002/bies.202000257.
+    Please cite it if you use this dataset for a publication.
+    """
     available_targets = ("cells", "nuclei")
     # TODO also support infected_cells
     # available_targets = ("cells", "nuclei", "infected_cells")
@@ -63,6 +68,8 @@ def get_covid_if_loader(
     path, patch_shape, batch_size, sample_range=None, target="cells", download=False,
     offsets=None, boundaries=False, binary=False, **kwargs
 ):
+    """Dataloader for the segmentation of myelinated axons. See 'get_covid_if_loader' for details.
+    """
     ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
     dataset = get_covid_if_dataset(
         path, patch_shape, sample_range=sample_range, target=target, download=download,

diff --git a/torch_em/data/datasets/cremi.py b/torch_em/data/datasets/cremi.py
@@ -25,44 +25,6 @@
 
 
 # TODO add support for realigned volumes
-def get_cremi_loader(
-    path,
-    patch_shape,
-    batch_size,
-    samples=("A", "B", "C"),
-    use_realigned=False,
-    download=False,
-    offsets=None,
-    boundaries=False,
-    rois={},
-    defect_augmentation_kwargs={
-        "p_drop_slice": 0.025,
-        "p_low_contrast": 0.025,
-        "p_deform_slice": 0.0,
-        "deformation_mode": "compress",
-    },
-    **kwargs,
-):
-    """
-    """
-    dataset_kwargs, loader_kwargs = util.split_kwargs(
-        torch_em.default_segmentation_dataset, **kwargs
-    )
-    ds = get_cremi_dataset(
-        path=path,
-        patch_shape=patch_shape,
-        samples=samples,
-        use_realigned=use_realigned,
-        download=download,
-        offsets=offsets,
-        boundaries=boundaries,
-        rois=rois,
-        defect_augmentation_kwargs=defect_augmentation_kwargs,
-        **dataset_kwargs,
-    )
-    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
-
-
 def get_cremi_dataset(
     path,
     patch_shape,
@@ -80,6 +42,10 @@ def get_cremi_dataset(
     },
     **kwargs,
 ):
+    """Dataset for the segmentation of neurons in EM.
+
+    This dataset is from the CREMI challenge: https://cremi.org/.
+    """
     assert len(patch_shape) == 3
     if rois is not None:
         assert isinstance(rois, dict)
@@ -132,3 +98,41 @@ def get_cremi_dataset(
     )
 
     return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape, **kwargs)
+
+
+def get_cremi_loader(
+    path,
+    patch_shape,
+    batch_size,
+    samples=("A", "B", "C"),
+    use_realigned=False,
+    download=False,
+    offsets=None,
+    boundaries=False,
+    rois={},
+    defect_augmentation_kwargs={
+        "p_drop_slice": 0.025,
+        "p_low_contrast": 0.025,
+        "p_deform_slice": 0.0,
+        "deformation_mode": "compress",
+    },
+    **kwargs,
+):
+    """Dataset for the segmentation of neurons in EM. See 'get_cremi_dataset' for details.
+    """
+    dataset_kwargs, loader_kwargs = util.split_kwargs(
+        torch_em.default_segmentation_dataset, **kwargs
+    )
+    ds = get_cremi_dataset(
+        path=path,
+        patch_shape=patch_shape,
+        samples=samples,
+        use_realigned=use_realigned,
+        download=download,
+        offsets=offsets,
+        boundaries=boundaries,
+        rois=rois,
+        defect_augmentation_kwargs=defect_augmentation_kwargs,
+        **dataset_kwargs,
+    )
+    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
diff --git a/torch_em/data/datasets/deepbacs.py b/torch_em/data/datasets/deepbacs.py
@@ -43,6 +43,11 @@ def _get_paths(path, bac_type, split):
 def get_deepbacs_dataset(
     path, split, patch_shape, bac_type="mixed", download=False, **kwargs
 ):
+    """Dataset for the segmentation of bacteria in light microscopy.
+
+    This dataset is from the publication https://doi.org/10.1038/s42003-022-03634-z.
+    Please cite it if you use this dataset for a publication.
+    """
     assert split in ("train", "test")
     bac_types = list(URLS.keys())
     assert bac_type in bac_types, f"{bac_type} is not in expected bacteria types: {bac_types}"
@@ -60,6 +65,8 @@ def get_deepbacs_dataset(
 
 
 def get_deepbacs_loader(path, split, patch_shape, batch_size, bac_type="mixed", download=False, **kwargs):
+    """Dataloader for the segmentation of bacteria in light microscopy. See 'get_deepbacs_dataset' for details.
+    """
     ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
     dataset = get_deepbacs_dataset(path, split, patch_shape, bac_type=bac_type, download=download, **ds_kwargs)
     loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

diff --git a/torch_em/data/datasets/dsb.py b/torch_em/data/datasets/dsb.py
@@ -38,6 +38,11 @@ def get_dsb_dataset(
     offsets=None, boundaries=False, binary=False,
     source="reduced", **kwargs
 ):
+    """Dataset for the segmentation of nuclei in light microscopy.
+
+    This dataset is from the publication https://doi.org/10.1038/s41592-019-0612-7.
+    Please cite it if you use this dataset for a publication.
+    """
     assert split in ("test", "train"), split
     _download_dsb(path, source, download)
 
@@ -58,6 +63,8 @@ def get_dsb_loader(
     offsets=None, boundaries=False, binary=False,
     source="reduced", **kwargs
 ):
+    """Dataloader for the segmentation of nuclei in light microscopy. See 'get_dsb_dataset' for details.
+    """
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/hpa.py b/torch_em/data/datasets/hpa.py
@@ -312,6 +312,11 @@ def get_hpa_segmentation_dataset(
     channels=["microtubules", "protein", "nuclei", "er"],
     download=False, n_workers_preproc=8, **kwargs
 ):
+    """Dataset for the segmentation of cells in light microscopy.
+
+    This dataset is from the publication https://doi.org/10.1038/s41592-019-0658-6.
+    Please cite it if you use this dataset for a publication.
+    """
     data_is_complete = _check_data(path)
     if not data_is_complete:
         _download_hpa_data(path, "segmentation", download)
@@ -336,6 +341,8 @@ def get_hpa_segmentation_loader(
     channels=["microtubules", "protein", "nuclei", "er"],
     download=False, n_workers_preproc=8, **kwargs
 ):
+    """Dataloader for the segmentation of cells in light microscopy. See 'get_hpa_segmentation_dataset' for details.
+    """
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/isbi2012.py b/torch_em/data/datasets/isbi2012.py
@@ -9,6 +9,11 @@ def get_isbi_dataset(
     path, patch_shape, download=False, offsets=None, boundaries=False,
     use_original_labels=False, **kwargs
 ):
+    """Dataset for the segmentation of neurons in EM.
+
+    This dataset is from the publication https://doi.org/10.3389/fnana.2015.00142.
+    Please cite it if you use this dataset for a publication.
+    """
     if not path.endswith(".h5"):
         raise ValueError("Isbi path must be a hdf5 file.")
     assert len(patch_shape) == 3
@@ -32,8 +37,7 @@ def get_isbi_loader(
     use_original_labels=False,
     **kwargs
 ):
-    """
-    """
+    """Dataloader for the segmentation of neurons in EM. See 'get_isbi_dataset' for details."""
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/kasthuri.py b/torch_em/data/datasets/kasthuri.py
@@ -81,6 +81,11 @@ def _require_kasthuri_data(path, download):
 
 
 def get_kasthuri_dataset(path, split, patch_shape, download=False, **kwargs):
+    """Dataset for the segmentation of mitochondria in EM.
+
+    This dataset is from the publication https://doi.org/10.48550/arXiv.1812.06024.
+    Please cite it if you use this dataset for a publication.
+    """
     assert split in ("train", "test")
     _require_kasthuri_data(path, download)
     data_path = os.path.join(path, f"kasthuri_{split}.h5")
@@ -90,6 +95,7 @@ def get_kasthuri_dataset(path, split, patch_shape, download=False, **kwargs):
 
 
 def get_kasthuri_loader(path, split, patch_shape, batch_size, download=False, **kwargs):
+    """Dataloader for the segmentation of mitochondria in EM. See 'get_kasthuri_dataset' for details."""
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/livecell.py b/torch_em/data/datasets/livecell.py
@@ -149,6 +149,11 @@ def get_livecell_dataset(
     offsets=None, boundaries=False, binary=False,
     cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs
 ):
+    """Dataset for the segmentation of cells in phase-contrast microscopy.
+
+    This dataset is from the publication https://doi.org/10.1038/s41592-021-01249-6.
+    Please cite it if you use this dataset for a publication.
+    """
     assert split in ("train", "val", "test")
     if cell_types is not None:
         assert isinstance(cell_types, (list, tuple)),\
@@ -169,9 +174,12 @@ def get_livecell_dataset(
     return dataset
 
 
-def get_livecell_loader(path, split, patch_shape, batch_size, download=False,
-                        offsets=None, boundaries=False, binary=False,
-                        cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs):
+def get_livecell_loader(
+    path, split, patch_shape, batch_size, download=False,
+    offsets=None, boundaries=False, binary=False,
+    cell_types=None, label_path=None, label_dtype=torch.int64, **kwargs
+):
+    """Dataloader for the segmentation of cells in phase-contrast microscopy. See 'get_livecell_dataset' for details."""
     ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
     dataset = get_livecell_dataset(
         path, split, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary,

diff --git a/torch_em/data/datasets/lizard.py b/torch_em/data/datasets/lizard.py
@@ -80,6 +80,11 @@ def _require_lizard_data(path, download):
 
 
 def get_lizard_dataset(path, patch_shape, download=False, **kwargs):
+    """Dataset for the segmentation of nuclei in histopathology.
+
+    This dataset is from the publication https://doi.org/10.48550/arXiv.2108.11195.
+    Please cite it if you use this dataset for a publication.
+    """
     _require_lizard_data(path, download)
 
     data_paths = glob(os.path.join(path, "*.h5"))
@@ -96,6 +101,7 @@ def get_lizard_dataset(path, patch_shape, download=False, **kwargs):
 # TODO implement selecting different tissue types
 # TODO implement train / val / test split (is pre-defined in a csv)
 def get_lizard_loader(path, patch_shape, batch_size, download=False, **kwargs):
+    """Dataloader for the segmentation of nuclei in histopathology. See 'get_lizard_dataset' for details."""
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/lucchi.py b/torch_em/data/datasets/lucchi.py
@@ -78,6 +78,11 @@ def _require_lucchi_data(path, download):
 
 
 def get_lucchi_dataset(path, split, patch_shape, download=False, **kwargs):
+    """Dataset for the segmentation of mitochondria in EM.
+
+    This dataset is from the publication https://doi.org/10.48550/arXiv.1812.06024.
+    Please cite it if you use this dataset for a publication.
+    """
     assert split in ("train", "test")
     _require_lucchi_data(path, download)
     data_path = os.path.join(path, f"lucchi_{split}.h5")
@@ -87,6 +92,7 @@ def get_lucchi_dataset(path, split, patch_shape, download=False, **kwargs):
 
 
 def get_lucchi_loader(path, split, patch_shape, batch_size, download=False, **kwargs):
+    """Dataloader for the segmentation of mitochondria in EM. See 'get_lucchi_dataset' for details"""
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/mitoem.py b/torch_em/data/datasets/mitoem.py
@@ -135,6 +135,11 @@ def get_mitoem_dataset(
     binary=False,
     **kwargs,
 ):
+    """Dataset for the segmentation of mitochondria in EM.
+
+    This dataset is from the publication https://doi.org/10.1007/978-3-030-59722-1_7.
+    Please cite it if you use this dataset for a publication.
+    """
     assert len(patch_shape) == 3
     if isinstance(splits, str):
         splits = [splits]
@@ -175,6 +180,7 @@ def get_mitoem_loader(
     binary=False,
     **kwargs,
 ):
+    """Dataloader for the segmentation of mitochondria in EM. See 'get_mitoem_dataset' for details."""
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/mouse_embryo.py b/torch_em/data/datasets/mouse_embryo.py
@@ -30,6 +30,10 @@ def get_mouse_embryo_dataset(
     binary=False,
     **kwargs,
 ):
+    """Dataset for the segmentation of nuclei in confocal microscopy.
+
+    This dataset is stored on zenodo: https://zenodo.org/record/6546550.
+    """
     assert name in ("membrane", "nuclei")
     assert split in ("train", "val")
     assert len(patch_shape) == 3
@@ -62,6 +66,7 @@ def get_mouse_embryo_loader(
     binary=False,
     **kwargs,
 ):
+    """Dataloader for the segmentation of nuclei in confocal microscopy. See 'get_mouse_embryo_dataset' for details."""
     ds_kwargs, loader_kwargs = util.split_kwargs(
         torch_em.default_segmentation_dataset, **kwargs
     )

diff --git a/torch_em/data/datasets/neurips_cell_seg.py b/torch_em/data/datasets/neurips_cell_seg.py
@@ -75,6 +75,10 @@ def get_neurips_cellseg_supervised_dataset(
     sampler=None,
     val_fraction=0.1,
 ):
+    """Dataset for the segmentation of cells in light microscopy.
+
+    This dataset is part of the NeuRIPS Cell Segmentation challenge: https://neurips22-cellseg.grand-challenge.org/.
+    """
     assert split in ("train", "val", None), split
     image_paths, label_paths = _get_image_and_label_paths(root, split, val_fraction)
 
@@ -111,6 +115,7 @@ def get_neurips_cellseg_supervised_loader(
     val_fraction=0.1,
     **loader_kwargs
 ):
+    """Dataloader for the segmentation of cells in light microscopy. See 'get_neurips_cellseg_supervised_dataset'."""
     ds = get_neurips_cellseg_supervised_dataset(
         root, split, patch_shape, make_rgb=make_rgb, label_transform=label_transform,
         label_transform2=label_transform2, raw_transform=raw_transform, transform=transform,
@@ -157,6 +162,10 @@ def get_neurips_cellseg_unsupervised_dataset(
     use_images=True,
     use_wholeslide=True,
 ):
+    """Dataset for the segmentation of cells in light microscopy.
+
+    This dataset is part of the NeuRIPS Cell Segmentation challenge: https://neurips22-cellseg.grand-challenge.org/.
+    """
     if raw_transform is None:
         trafo = to_rgb if make_rgb else None
         raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo)
@@ -196,6 +205,7 @@ def get_neurips_cellseg_unsupervised_loader(
     use_wholeslide=True,
     **loader_kwargs,
 ):
+    """Dataloader for the segmentation of cells in light microscopy. See 'get_neurips_cellseg_unsupervised_dataset'."""
     ds = get_neurips_cellseg_unsupervised_dataset(
         root, patch_shape, make_rgb=make_rgb, raw_transform=raw_transform, transform=transform,
         dtype=dtype, sampler=sampler, use_images=use_images, use_wholeslide=use_wholeslide