Reorganize package dependencies (#506)

* Update dependencies and imports for MONAI and typing * Refactor MedicalImage and MedicalImageFolder classes * allow init of MedicalImage class; raise error in methods * Refactor import_optional_module function to allow importing module attributes * Refactor MedicalImage optional module imports * Update dependencies in pyproject.toml * Add test for MedicalImage feature without MONAI * Prevent use of txrv_transforms method at runtime if MONAI is not installed * Move report package deps to core installation * Adjust package installation tests * Formatting fix * Remove report package test action * Formatting fix * remove txrv_transforms, add dictionary wrapper for torchvision transforms, and remove monai deps from monitor package * fix repr for transform * fix Dictd call func * fix monitor-api notebook * Update imports for image transforms * Update metadata for cxr_classification.ipynb Signed-off-by: Franklin <41602287+fcogidi@users.noreply.github.com> * fix transforms in notebooks --------- Signed-off-by: Franklin <41602287+fcogidi@users.noreply.github.com> Co-authored-by: Amrit K <amritk@vectorinstitute.ai> Co-authored-by: akore <akore0x5f@gmail.com>
VectorInstitute · Nov 21, 2023 · 8a0ef2f · 8a0ef2f
1 parent b5ecf98
commit 8a0ef2f
Show file tree

Hide file tree

Showing 26 changed files with 1,268 additions and 1,115 deletions.
diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
@@ -51,17 +51,3 @@ jobs:
           pip install -e ".[models]"
           pip install pytest
           python3 -m pytest tests/package/extras/models.py
-  extra-report-package-install-check:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - name: Install pip
-        run: python3 -m pip install --upgrade pip
-      - uses: actions/setup-python@v4.7.1
-        with:
-          python-version: '3.10'
-      - name: Install package and test import
-        run: |
-          pip install -e ".[report]"
-          pip install pytest
-          python3 -m pytest tests/package/extras/report.py
diff --git a/README.md b/README.md
@@ -13,20 +13,17 @@
 
 ``cyclops`` is a toolkit for facilitating research and deployment of ML models for healthcare. It provides a few high-level APIs namely:
 
-* `data` - Create datasets for training, inference and evaluation. We use the popular 🤗 [datasets](https://github.com/huggingface/datasets) to efficiently load and slice different modalities of data.
-* `models` - Use common model implementations using [scikit-learn](https://scikit-learn.org/stable/) and [PyTorch](https://pytorch.org/).
-* `tasks` - Use canonical Healthcare ML tasks such as
-    * Mortality prediction
-    * Chest X-ray classification
+* `data` - Create datasets for training, inference and evaluation. We use the popular 🤗 [datasets](https://github.com/huggingface/datasets) to efficiently load and slice different modalities of data
+* `models` - Use common model implementations using [scikit-learn](https://scikit-learn.org/stable/) and [PyTorch](https://pytorch.org/)
+* `tasks` - Use common ML task formulations such as binary classification or multi-label classification on tabular, time-series and image data
 * `evaluate` - Evaluate models on clinical prediction tasks
 * `monitor` - Detect dataset shift relevant for clinical use cases
 * `report` - Create [model report cards](https://vectorinstitute.github.io/cyclops/api/tutorials/nihcxr/nihcxr_report_periodic.html) for clinical ML models
 
-``cyclops`` also provides a library of end-to-end use cases on clinical datasets such as
+``cyclops`` also provides example end-to-end use case implementations on clinical datasets such as
 
-* [MIMIC-III](https://physionet.org/content/mimiciii/1.4/)
+* [NIH chest x-ray](https://www.nih.gov/news-events/news-releases/nih-clinical-center-provides-one-largest-publicly-available-chest-x-ray-datasets-scientific-community)
 * [MIMIC-IV](https://physionet.org/content/mimiciv/2.0/)
-* [eICU-CRD](https://eicu-crd.mit.edu/about/eicu/)
 
 
 ## 🐣 Getting Started
@@ -37,31 +34,15 @@
 python3 -m pip install pycyclops
 ```
 
-The base package installation supports the use of the `data` and `process` APIs to load
-and transform clinical data, for downstream tasks.
+The base cyclops installation doesn't include modelling packages.
 
-To install additional functionality from the other APIs, they can be installed as extras.
+To install additional dependencies for using models,
 
 
-To install with `models`, `tasks`, `evaluate` and `monitor` API support,
-
 ```bash
 python3 -m pip install 'pycyclops[models]'
 ```
 
-To install with `report` API support,
-
-```bash
-python3 -m pip install 'pycyclops[report]'
-```
-
-Multiple extras could also be combined, for example to install with both `report` and
-`models` support:
-
-```bash
-python3 -m pip install 'pycyclops[report,models]'
-```
-
 
 ## 🧑🏿‍💻 Developing
 

diff --git a/cyclops/data/features/medical_image.py b/cyclops/data/features/medical_image.py
@@ -1,11 +1,10 @@
 """Medical image feature."""
 
-import logging
 import os
 import tempfile
 from dataclasses import dataclass, field
 from io import BytesIO
-from typing import Any, ClassVar, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Tuple, Union
 
 import numpy as np
 import numpy.typing as npt
@@ -15,18 +14,56 @@
 from datasets.features import Image, features
 from datasets.utils.file_utils import is_local_path
 from datasets.utils.py_utils import string_to_dict
-from monai.data.image_reader import ImageReader
-from monai.data.image_writer import ITKWriter
-from monai.transforms.compose import Compose
-from monai.transforms.io.array import LoadImage
-from monai.transforms.utility.array import ToNumpy
 
-from cyclops.utils.log import setup_logging
+from cyclops.utils.optional import import_optional_module
 
 
-# Logging.
-LOGGER = logging.getLogger(__name__)
-setup_logging(print_level="INFO", logger=LOGGER)
+if TYPE_CHECKING:
+    from monai.data.image_reader import ImageReader
+    from monai.data.image_writer import ITKWriter
+    from monai.transforms.compose import Compose
+    from monai.transforms.io.array import LoadImage
+    from monai.transforms.utility.array import ToNumpy
+else:
+    ImageReader = import_optional_module(
+        "monai.data.image_reader",
+        attribute="ImageReader",
+        error="warn",
+    )
+    ITKWriter = import_optional_module(
+        "monai.data.image_writer",
+        attribute="ITKWriter",
+        error="warn",
+    )
+    Compose = import_optional_module(
+        "monai.transforms.compose",
+        attribute="Compose",
+        error="warn",
+    )
+    LoadImage = import_optional_module(
+        "monai.transforms.io.array",
+        attribute="LoadImage",
+        error="warn",
+    )
+    ToNumpy = import_optional_module(
+        "monai.transforms.utility.array",
+        attribute="ToNumpy",
+        error="warn",
+    )
+_monai_available = all(
+    module is not None
+    for module in (
+        ImageReader,
+        ITKWriter,
+        Compose,
+        LoadImage,
+        ToNumpy,
+    )
+)
+_monai_unavailable_message = (
+    "The MONAI library is required to use the `MedicalImage` feature. "
+    "Please install it with `pip install monai`."
+)
 
 
 @dataclass
@@ -35,24 +72,35 @@ class MedicalImage(Image):  # type: ignore
 
     Parameters
     ----------
-    decode : bool, optional, default=True
-        Whether to decode the image. If False, the image will be returned as a
-        dictionary in the format `{"path": image_path, "bytes": image_bytes}`.
     reader : Union[str, ImageReader], optional, default="ITKReader"
         The MONAI image reader to use.
     suffix : str, optional, default=".jpg"
         The suffix to use when decoding bytes to image.
+    decode : bool, optional, default=True
+        Whether to decode the image. If False, the image will be returned as a
+        dictionary in the format `{"path": image_path, "bytes": image_bytes}`.
+    id : str, optional, default=None
+        The id of the feature.
 
     """
 
     reader: Union[str, ImageReader] = "ITKReader"
     suffix: str = ".jpg"  # used when decoding/encoding bytes to image
-    _loader = Compose(
-        [
-            LoadImage(reader=reader, simple_keys=True, dtype=None, image_only=True),
-            ToNumpy(),
-        ],
-    )
+
+    _loader = None
+    if _monai_available:
+        _loader = Compose(
+            [
+                LoadImage(
+                    reader=reader,
+                    simple_keys=True,
+                    dtype=None,
+                    image_only=False,
+                ),
+                ToNumpy(),
+            ],
+        )
+
     # Automatically constructed
     dtype: ClassVar[str] = "dict"
     pa_type: ClassVar[Any] = pa.struct({"bytes": pa.binary(), "path": pa.string()})
@@ -76,12 +124,14 @@ def encode_example(
 
         """
         if isinstance(value, list):
-            value = np.array(value)
+            value = np.asarray(value)
 
         if isinstance(value, str):
             return {"path": value, "bytes": None}
+
         if isinstance(value, np.ndarray):
             return _encode_ndarray(value, image_format=self.suffix)
+
         if "array" in value and "metadata" in value:
             output_ext_ = self.suffix
             metadata_ = value["metadata"]
@@ -132,7 +182,7 @@ def decode_example(
         if not self.decode:
             raise RuntimeError(
                 "Decoding is disabled for this feature. "
-                "Please use MedicalImage(decode=True) instead.",
+                "Please use `MedicalImage(decode=True)` instead.",
             )
 
         if token_per_repo_id is None:
@@ -147,6 +197,8 @@ def decode_example(
                 )
 
             if is_local_path(path):
+                if self._loader is None:
+                    raise RuntimeError(_monai_unavailable_message)
                 image, metadata = self._loader(path)
             else:
                 source_url = path.split("::")[-1]
@@ -188,6 +240,9 @@ def _read_file_from_bytes(
             Image as numpy array and metadata as dictionary.
 
         """
+        if self._loader is None:
+            raise RuntimeError(_monai_unavailable_message)
+
         # XXX: Can we avoid writing to disk?
         with tempfile.NamedTemporaryFile(mode="wb", suffix=self.suffix) as fp:
             fp.write(buffer.getvalue())
@@ -219,6 +274,9 @@ def _encode_ndarray(
         Dictionary containing the image bytes and path.
 
     """
+    if not _monai_available:
+        raise RuntimeError(_monai_unavailable_message)
+
     if not image_format.startswith("."):
         image_format = "." + image_format
 
@@ -240,5 +298,5 @@ def _encode_ndarray(
         return {"path": None, "bytes": temp_file_bytes}
 
 
-# add the `MedicalImage` feature to the `features` module
+# add the `MedicalImage` feature to the `features` module namespace
 features.MedicalImage = MedicalImage
diff --git a/cyclops/data/packaged_loading_scripts/medical_imagefolder/medical_imagefolder.py b/cyclops/data/packaged_loading_scripts/medical_imagefolder/medical_imagefolder.py
@@ -33,7 +33,7 @@ class MedicalImageFolderConfig(
 class MedicalImageFolder(folder_based_builder.FolderBasedBuilder):  # type: ignore
     """MedicalImageFolder."""
 
-    BASE_FEATURE = MedicalImage()
+    BASE_FEATURE = MedicalImage
     BASE_COLUMN_NAME = "image"
     BUILDER_CONFIG_CLASS = MedicalImageFolderConfig
     EXTENSIONS: List[str]  # definition at the bottom of the script

diff --git a/cyclops/data/transforms.py b/cyclops/data/transforms.py
@@ -1,28 +1,84 @@
 """Transforms for the datasets."""
+from typing import Any, Callable, Tuple
 
-from typing import Tuple
-
-from monai.transforms import Lambdad, Resized, ToDeviced  # type: ignore
-from torchvision.transforms import Compose
-
-
-def txrv_transforms(
-    keys: Tuple[str, ...] = ("features",),
-    device: str = "cpu",
-) -> Compose:
-    """Set of transforms for the models in the TXRV library."""
-    return Compose(
-        [
-            Resized(
-                keys=keys,
-                spatial_size=(1, 224, 224),
-                allow_missing_keys=True,
-            ),
-            Lambdad(
-                keys=keys,
-                func=lambda x: ((2 * (x / 255.0)) - 1.0) * 1024,
-                allow_missing_keys=True,
-            ),
-            ToDeviced(keys=keys, device=device, allow_missing_keys=True),
-        ],
-    )
+from torchvision.transforms import Lambda, Resize
+
+
+# generic dictionary-based wrapper for any transform
+class Dictd:
+    """Generic dictionary-based wrapper for any transform."""
+
+    def __init__(
+        self,
+        transform: Callable[..., Any],
+        keys: Tuple[str, ...],
+        allow_missing_keys: bool = False,
+    ):
+        self.transform = transform
+        self.keys = keys
+        self.allow_missing_keys = allow_missing_keys
+
+    def __call__(self, data: Any) -> Any:
+        """Apply the transform to the data."""
+        for key in self.keys:
+            if self.allow_missing_keys and key not in data:
+                continue
+            data[key] = self.transform(data[key])
+        return data
+
+    def __repr__(self) -> str:
+        """Return a string representation of the transform."""
+        return (
+            f"{self.__class__.__name__}(transform={self.transform}, "
+            f"keys={self.keys}, allow_missing_keys={self.allow_missing_keys})"
+        )
+
+
+# dictionary-based wrapper of Lambda transform using Dictd
+class Lambdad:
+    """Dictionary-based wrapper of Lambda transform using Dictd."""
+
+    def __init__(
+        self,
+        func: Callable[..., Any],
+        keys: Tuple[str, ...],
+        allow_missing_keys: bool = False,
+    ):
+        self.transform = Dictd(
+            transform=Lambda(func),
+            keys=keys,
+            allow_missing_keys=allow_missing_keys,
+        )
+
+    def __call__(self, data: Any) -> Any:
+        """Apply the transform to the data."""
+        return self.transform(data)
+
+    def __repr__(self) -> str:
+        """Return a string representation of the transform."""
+        return f"{self.__class__.__name__}(keys={self.transform.keys}, allow_missing_keys={self.transform.allow_missing_keys})"
+
+
+# dictionary-based wrapper of Resize transform using Dictd
+class Resized:
+    """Dictionary-based wrapper of Resize transform using Dictd."""
+
+    def __init__(
+        self,
+        spatial_size: Tuple[int, int],
+        keys: Tuple[str, ...],
+        allow_missing_keys: bool = False,
+    ):
+        self.transform = Dictd(
+            transform=Resize(size=spatial_size),
+            keys=keys,
+            allow_missing_keys=allow_missing_keys,
+        )
+
+    def __call__(self, data: Any) -> Any:
+        """Apply the transform to the data."""
+        return self.transform(data)
+
+    def __repr__(self) -> str:
+        """Return a string representation of the transform."""
+        return f"{self.__class__.__name__}(keys={self.transform.keys}, allow_missing_keys={self.transform.allow_missing_keys})"
diff --git a/cyclops/evaluate/metrics/experimental/distributed_backends/mpi4py.py b/cyclops/evaluate/metrics/experimental/distributed_backends/mpi4py.py
@@ -15,7 +15,7 @@
 if TYPE_CHECKING:
     from mpi4py import MPI
 else:
-    MPI = import_optional_module("mpi4py.MPI", error="ignore")
+    MPI = import_optional_module("mpi4py.MPI", error="warn")
 # mypy: disable-error-code="no-any-return"