From 935d6255eaaef27fe10d2a9960392165b29e7351 Mon Sep 17 00:00:00 2001 From: Anwai Archit <52396323+anwai98@users.noreply.github.com> Date: Wed, 5 Jun 2024 20:31:37 +0200 Subject: [PATCH] Add FeTa dataset (#293) Add feta 2024 challenge dataset --------- Co-authored-by: Constantin Pape --- scripts/datasets/medical/check_feta24.py | 21 ++++ torch_em/data/datasets/medical/__init__.py | 1 + torch_em/data/datasets/medical/feta24.py | 109 +++++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 scripts/datasets/medical/check_feta24.py create mode 100644 torch_em/data/datasets/medical/feta24.py diff --git a/scripts/datasets/medical/check_feta24.py b/scripts/datasets/medical/check_feta24.py new file mode 100644 index 00000000..83e70645 --- /dev/null +++ b/scripts/datasets/medical/check_feta24.py @@ -0,0 +1,21 @@ +from torch_em.util.debug import check_loader +from torch_em.data.datasets.medical import get_feta24_loader + + +ROOT = "/media/anwai/ANWAI/data/feta24" + + +def check_feta24(): + loader = get_feta24_loader( + path=ROOT, + patch_shape=(1, 512, 512), + batch_size=2, + resize_inputs=True, + download=False, + ) + + check_loader(loader, 8) + + +if __name__ == "__main__": + check_feta24() diff --git a/torch_em/data/datasets/medical/__init__.py b/torch_em/data/datasets/medical/__init__.py index cb711022..5944f755 100644 --- a/torch_em/data/datasets/medical/__init__.py +++ b/torch_em/data/datasets/medical/__init__.py @@ -3,6 +3,7 @@ from .busi import get_busi_dataset, get_busi_loader from .camus import get_camus_dataset, get_camus_loader from .drive import get_drive_dataset, get_drive_loader +from .feta24 import get_feta24_dataset, get_feta24_loader from .idrid import get_idrid_dataset, get_idrid_loader from .montgomery import get_montgomery_dataset, get_montgomery_loader from .msd import get_msd_dataset, get_msd_loader diff --git a/torch_em/data/datasets/medical/feta24.py b/torch_em/data/datasets/medical/feta24.py new file mode 100644 index 00000000..be76600a --- /dev/null +++ b/torch_em/data/datasets/medical/feta24.py @@ -0,0 +1,109 @@ +import os +from glob import glob +from natsort import natsorted +from typing import Union, Tuple + +import torch_em + +from .. import util + + +def get_feta24_data(path, download): + """This function describes the download fucntionality and ensures your data has been downloaded in expected format. + + The dataset is from the FeTa Challenge 2024 - https://fetachallenge.github.io/ (Task 1: Segmentation). + A detailed description of the dataset is provided here: https://fetachallenge.github.io/pages/Data_description. + To download the dataset, please follow the below mentioned steps: + - Go to the section `1. Request access and download the FeTa 2024 data from the University Children's Hospital + Zurich` at `https://fetachallenge.github.io/pages/Data_download`, which explains the steps to be a registered user + in Synapse platform and expects the user to agree with the mentioned conditions. + - While registration, the users are expected to provide some information + (see https://fetachallenge.github.io/pages/Data_download for details). + - Next, you can proceed with requesting access (by following provided instructions) at + https://www.synapse.org/#!Synapse:syn25649159/wiki/610007. + + Once you have access to the dataset, you can use the synapse client or the platform download option to get + the zipped files. It contains 80 scans paired with their segmentations (more details in the challenge website). + + Finally, you should provide the path to the parent directory where the zipfile is stored. + """ + if download: + print("Download is not supported due to the challenge's setup. See 'get_feta24_data' for details.") + + data_dir = os.path.join(path, "feta_2.3") + if os.path.exists(data_dir): + return data_dir + + zip_path = os.path.join(path, "feta_2.3.zip") + if not os.path.exists(zip_path): + raise FileNotFoundError(f"The downloaded zip file was not found. Please download it and place it at '{path}'.") + + util.unzip(zip_path=zip_path, dst=path) + + return data_dir + + +def _get_feta24_paths(path, download): + data_dir = get_feta24_data(path=path, download=download) + + base_dir = os.path.join(data_dir, "sub-*", "anat") + image_paths = natsorted(glob(os.path.join(base_dir, "sub-*_rec-*_T2w.nii.gz"))) + gt_paths = natsorted(glob(os.path.join(base_dir, "sub-*_rec-*_dseg.nii.gz"))) + + return image_paths, gt_paths + + +def get_feta24_dataset( + path: Union[os.PathLike, str], + patch_shape: Tuple[int, ...], + resize_inputs: bool = False, + download: bool = False, + **kwargs +): + """Dataset for segmentation of fetal brain tissues in MRI. + + The dataset cannot be automatically download. See `get_feta24_data` for details. + + This dataset is from FeTa 2024 Challenge: + - https://doi.org/10.5281/zenodo.11192452 + - Payete et al. - https://doi.org/10.1038/s41597-021-00946-3 + + Please cite it if you use this dataset in your publication. + """ + image_paths, gt_paths = _get_feta24_paths(path=path, download=download) + + if resize_inputs: + resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} + kwargs, patch_shape = util.update_kwargs_for_resize_trafo( + kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs + ) + + dataset = torch_em.default_segmentation_dataset( + raw_paths=image_paths, + raw_key="data", + label_paths=gt_paths, + label_key="data", + patch_shape=patch_shape, + **kwargs + ) + + return dataset + + +def get_feta24_loader( + path: Union[os.PathLike, str], + patch_shape: Tuple[int, ...], + batch_size: int, + resize_inputs: bool = False, + download: bool = False, + **kwargs +): + """Dataloader for segmentation of fetal brain tissues in MRI. + See `get_feta24_dataset` for details. + """ + ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) + dataset = get_feta24_dataset( + path=path, patch_shape=patch_shape, resize_inputs=resize_inputs, download=download, **ds_kwargs + ) + loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) + return loader