From 935d6255eaaef27fe10d2a9960392165b29e7351 Mon Sep 17 00:00:00 2001
From: Anwai Archit <52396323+anwai98@users.noreply.github.com>
Date: Wed, 5 Jun 2024 20:31:37 +0200
Subject: [PATCH] Add FeTa dataset (#293)

Add feta 2024 challenge dataset

---------

Co-authored-by: Constantin Pape <constantin.pape@embl.de>
---
 scripts/datasets/medical/check_feta24.py   |  21 ++++
 torch_em/data/datasets/medical/__init__.py |   1 +
 torch_em/data/datasets/medical/feta24.py   | 109 +++++++++++++++++++++
 3 files changed, 131 insertions(+)
 create mode 100644 scripts/datasets/medical/check_feta24.py
 create mode 100644 torch_em/data/datasets/medical/feta24.py

diff --git a/scripts/datasets/medical/check_feta24.py b/scripts/datasets/medical/check_feta24.py
new file mode 100644
index 00000000..83e70645
--- /dev/null
+++ b/scripts/datasets/medical/check_feta24.py
@@ -0,0 +1,21 @@
+from torch_em.util.debug import check_loader
+from torch_em.data.datasets.medical import get_feta24_loader
+
+
+ROOT = "/media/anwai/ANWAI/data/feta24"
+
+
+def check_feta24():
+    loader = get_feta24_loader(
+        path=ROOT,
+        patch_shape=(1, 512, 512),
+        batch_size=2,
+        resize_inputs=True,
+        download=False,
+    )
+
+    check_loader(loader, 8)
+
+
+if __name__ == "__main__":
+    check_feta24()
diff --git a/torch_em/data/datasets/medical/__init__.py b/torch_em/data/datasets/medical/__init__.py
index cb711022..5944f755 100644
--- a/torch_em/data/datasets/medical/__init__.py
+++ b/torch_em/data/datasets/medical/__init__.py
@@ -3,6 +3,7 @@
 from .busi import get_busi_dataset, get_busi_loader
 from .camus import get_camus_dataset, get_camus_loader
 from .drive import get_drive_dataset, get_drive_loader
+from .feta24 import get_feta24_dataset, get_feta24_loader
 from .idrid import get_idrid_dataset, get_idrid_loader
 from .montgomery import get_montgomery_dataset, get_montgomery_loader
 from .msd import get_msd_dataset, get_msd_loader
diff --git a/torch_em/data/datasets/medical/feta24.py b/torch_em/data/datasets/medical/feta24.py
new file mode 100644
index 00000000..be76600a
--- /dev/null
+++ b/torch_em/data/datasets/medical/feta24.py
@@ -0,0 +1,109 @@
+import os
+from glob import glob
+from natsort import natsorted
+from typing import Union, Tuple
+
+import torch_em
+
+from .. import util
+
+
+def get_feta24_data(path, download):
+    """This function describes the download fucntionality and ensures your data has been downloaded in expected format.
+
+    The dataset is from the FeTa Challenge 2024 - https://fetachallenge.github.io/ (Task 1: Segmentation).
+    A detailed description of the dataset is provided here: https://fetachallenge.github.io/pages/Data_description.
+    To download the dataset, please follow the below mentioned steps:
+    - Go to the section `1. Request access and download the FeTa 2024 data from the University Children's Hospital
+    Zurich` at `https://fetachallenge.github.io/pages/Data_download`, which explains the steps to be a registered user
+    in Synapse platform and expects the user to agree with the mentioned conditions.
+    - While registration, the users are expected to provide some information
+    (see https://fetachallenge.github.io/pages/Data_download for details).
+    - Next, you can proceed with requesting access (by following provided instructions) at
+    https://www.synapse.org/#!Synapse:syn25649159/wiki/610007.
+
+    Once you have access to the dataset, you can use the synapse client or the platform download option to get
+    the zipped files. It contains 80 scans paired with their segmentations (more details in the challenge website).
+
+    Finally, you should provide the path to the parent directory where the zipfile is stored.
+    """
+    if download:
+        print("Download is not supported due to the challenge's setup. See 'get_feta24_data' for details.")
+
+    data_dir = os.path.join(path, "feta_2.3")
+    if os.path.exists(data_dir):
+        return data_dir
+
+    zip_path = os.path.join(path, "feta_2.3.zip")
+    if not os.path.exists(zip_path):
+        raise FileNotFoundError(f"The downloaded zip file was not found. Please download it and place it at '{path}'.")
+
+    util.unzip(zip_path=zip_path, dst=path)
+
+    return data_dir
+
+
+def _get_feta24_paths(path, download):
+    data_dir = get_feta24_data(path=path, download=download)
+
+    base_dir = os.path.join(data_dir, "sub-*", "anat")
+    image_paths = natsorted(glob(os.path.join(base_dir, "sub-*_rec-*_T2w.nii.gz")))
+    gt_paths = natsorted(glob(os.path.join(base_dir, "sub-*_rec-*_dseg.nii.gz")))
+
+    return image_paths, gt_paths
+
+
+def get_feta24_dataset(
+    path: Union[os.PathLike, str],
+    patch_shape: Tuple[int, ...],
+    resize_inputs: bool = False,
+    download: bool = False,
+    **kwargs
+):
+    """Dataset for segmentation of fetal brain tissues in MRI.
+
+    The dataset cannot be automatically download. See `get_feta24_data` for details.
+
+    This dataset is from FeTa 2024 Challenge:
+    - https://doi.org/10.5281/zenodo.11192452
+    - Payete et al. - https://doi.org/10.1038/s41597-021-00946-3
+
+    Please cite it if you use this dataset in your publication.
+    """
+    image_paths, gt_paths = _get_feta24_paths(path=path, download=download)
+
+    if resize_inputs:
+        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
+        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
+            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
+        )
+
+    dataset = torch_em.default_segmentation_dataset(
+        raw_paths=image_paths,
+        raw_key="data",
+        label_paths=gt_paths,
+        label_key="data",
+        patch_shape=patch_shape,
+        **kwargs
+    )
+
+    return dataset
+
+
+def get_feta24_loader(
+    path: Union[os.PathLike, str],
+    patch_shape: Tuple[int, ...],
+    batch_size: int,
+    resize_inputs: bool = False,
+    download: bool = False,
+    **kwargs
+):
+    """Dataloader for segmentation of fetal brain tissues in MRI.
+    See `get_feta24_dataset` for details.
+    """
+    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
+    dataset = get_feta24_dataset(
+        path=path, patch_shape=patch_shape, resize_inputs=resize_inputs, download=download, **ds_kwargs
+    )
+    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
+    return loader