Merge remote-tracking branch 'origin/master' into typehint_bids_dataf…

…rame
ivadomed · Nov 20, 2022 · 1f6d203 · 1f6d203
2 parents 5ebbf19 + f7e70a1
commit 1f6d203
Show file tree

Hide file tree

Showing 5 changed files with 166 additions and 83 deletions.
diff --git a/docs/source/configuration_file.rst b/docs/source/configuration_file.rst
@@ -911,6 +911,11 @@ Split Dataset
         }
     }
 
+.. note::
+    .. line-block::
+            The fraction of the dataset used as validation set will correspond to ``1 - train_fraction - test_fraction``.
+            For example: ``1 - 0.6 - 0.2 = 0.2``.
+
 
 Training Parameters
 -------------------

diff --git a/ivadomed/config_manager.py b/ivadomed/config_manager.py
@@ -1,58 +1,61 @@
 import json
 import collections.abc
+from typing import Dict, List, Any, KeysView, Union
+
 from loguru import logger
 from pathlib import Path
 from ivadomed import utils as imed_utils
 from ivadomed.keywords import ConfigKW, LoaderParamsKW, SplitDatasetKW, DataTestingKW
 import copy
 
 
-def update(d, u):
+def update(source_dict: dict, destination_dict: dict) -> dict:
     """Update dictionary and nested dictionaries.
 
     Args:
-        d (dict): Source dictionary that is updated by destination dictionary.
-        u (dict): Destination dictionary.
+        source_dict (dict): Source dictionary that is updated by destination dictionary.
+        destination_dict (dict): Destination dictionary.
 
     Returns:
         dict: updated dictionary
     """
-    for k, v in u.items():
-        if isinstance(v, collections.abc.Mapping):
-            d[k] = update(d.get(k, {}), v)
+    for key, value in destination_dict.items():
+        if isinstance(value, collections.abc.Mapping):
+            source_dict[key] = update(source_dict.get(key, {}), value)
         else:
             # If source dictionary has keys that the destination dict doesn't have, keep these keys
-            if k in d and isinstance(d[k], collections.abc.Mapping) and not isinstance(v, collections.abc.Mapping):
+            if key in source_dict and isinstance(source_dict[key], collections.abc.Mapping) and not isinstance(value,
+                                                                                                               collections.abc.Mapping):
                 pass
             else:
-                d[k] = v
-    return d
+                source_dict[key] = value
+    return source_dict
 
 
-def deep_dict_compare(source_dict, dest_dict, keyname=None):
+def deep_dict_compare(source_dict: dict, destination_dict: dict, keyname: str = None):
     """Compare and display differences between dictionaries (and nested dictionaries).
 
     Args:
         source_dict (dict): Source dictionary.
-        dest_dict (dict): Destination dictionary.
+        destination_dict (dict): Destination dictionary.
         keyname (str): Key name to indicate the path to nested parameter.
 
     """
-    for key in dest_dict:
+    for key in destination_dict:
         if key not in source_dict:
             key_str = key if keyname is None else keyname + key
-            logger.info(f'    {key_str}: {dest_dict[key]}')
+            logger.info(f'    {key_str}: {destination_dict[key]}')
 
         else:
-            if isinstance(dest_dict[key], collections.abc.Mapping):
+            if isinstance(destination_dict[key], collections.abc.Mapping):
                 if isinstance(source_dict[key], collections.abc.Mapping):
-                    deep_dict_compare(source_dict[key], dest_dict[key], key + ": ")
+                    deep_dict_compare(source_dict[key], destination_dict[key], key + ": ")
                 # In case a new dictionary appears in updated file
                 else:
-                    deep_dict_compare(source_dict, dest_dict[key], key + ": ")
+                    deep_dict_compare(source_dict, destination_dict[key], key + ": ")
 
 
-def load_json(config_path):
+def load_json(config_path: str) -> dict:
     """Load json file content
 
     Args:
@@ -67,7 +70,7 @@ def load_json(config_path):
     return default_config
 
 
-# To ensure retrocompatibility for parameter changes in configuration file
+# To ensure retro-compatibility for parameter changes in configuration file
 KEY_CHANGE_DICT = {'UNet3D': ConfigKW.MODIFIED_3D_UNET, 'bids_path': LoaderParamsKW.PATH_DATA,
                    'log_directory': ConfigKW.PATH_OUTPUT}
 KEY_SPLIT_DATASET_CHANGE_LST = ['method', 'center_test']
@@ -84,68 +87,114 @@ class ConfigurationManager(object):
         context_original (dict): Provided configuration file.
         config_updated (dict): Updated configuration file.
     """
-    def __init__(self, path_context):
-        self.path_context = path_context
-        self.key_change_dict = KEY_CHANGE_DICT
-        self.key_split_dataset_change_lst = KEY_SPLIT_DATASET_CHANGE_LST
+
+    def __init__(self, path_context: str):
+        """
+        Initialize the ConfigurationManager by validating the given path and loading the file.
+        Also load the default configuration file.
+
+        Args:
+            path_context (str): Path to configuration file.
+        """
+        self.path_context: str = path_context
+        self.key_change_dict: Dict[str, str] = KEY_CHANGE_DICT
+        self.key_split_dataset_change_lst: List[str] = KEY_SPLIT_DATASET_CHANGE_LST
         self._validate_path()
-        default_config_path = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json"))
-        self.config_default = load_json(default_config_path)
-        self.context_original = load_json(path_context)
-        self.config_updated = {}
+        default_config_path: str = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json"))
+        self.config_default: dict = load_json(default_config_path)
+        self.context_original: dict = load_json(path_context)
+        self.config_updated: dict = {}
 
     @property
-    def config_updated(self):
+    def config_updated(self) -> dict:
+        """
+        This function simply returns the attribute `_config_updated`.
+
+        Returns:
+            dict: `_config_updated` attribute
+        """
         return self._config_updated
 
     @config_updated.setter
-    def config_updated(self, config_updated):
+    def config_updated(self, config_updated: dict):
+        """
+        If config_updated is empty we copy the loaded configuration into it and apply some changes (changing keys name,
+        changing values, deleting key-value pair) to ensure retro-compatibility.
+        Sets the new config_updated to the attribute `_config_updated`.
+
+        Args:
+            config_updated (dict): The new configuration to set.
+        """
         if config_updated == {}:
-            context = copy.deepcopy(self.context_original)
+            context: dict = copy.deepcopy(self.context_original)
             self.change_keys(context, list(context.keys()))
-            config_updated = update(self.config_default, context)
-            self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET], config_updated[ConfigKW.SPLIT_DATASET].keys())
+            config_updated: dict = update(self.config_default, context)
+            self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET],
+                                    config_updated[ConfigKW.SPLIT_DATASET].keys())
 
-        self._config_updated = config_updated
+        self._config_updated: dict = config_updated
         if config_updated['debugging']:
             self._display_differing_keys()
 
-    def get_config(self):
+    def get_config(self) -> dict:
         """Get updated configuration file with all parameters from the default config file.
+
         Returns:
             dict: Updated configuration dict.
         """
         return self.config_updated
 
-    def change_keys(self, context, keys):
-        for k in keys:
+    def change_keys(self, context: Union[dict, collections.abc.Mapping], keys: List[str]):
+        """
+        This function changes the name of the keys of the context dictionary, that are also in the `key_change_dict`
+        attribute, to the values that are associated with them in the `key_change_dict` attribute.
+
+        Args:
+            context (Union[dict, collections.abc.Mapping]): The dictionary to change.
+            keys (List[str]): The keys in context to consider.
+        """
+        for key_to_change in keys:
             # Verify if key is still in the dict
-            if k in context:
-                if k == "NumpyToTensor":
-                    del context[k]
+            if key_to_change in context:
+                # If the key_to_change is "NumpyToTensor", remove it from the context.
+                if key_to_change == "NumpyToTensor":
+                    del context[key_to_change]
                     continue
-                v = context[k]
+                value_to_change: Any = context[key_to_change]
                 # Verify if value is a dictionary
-                if isinstance(v, collections.abc.Mapping):
-                    self.change_keys(v, list(v.keys()))
+                if isinstance(value_to_change, collections.abc.Mapping):
+                    self.change_keys(value_to_change, list(value_to_change.keys()))
                 else:
                     # Change keys from the key_change_dict
                     for key in self.key_change_dict:
                         if key in context:
                             context[self.key_change_dict[key]] = context[key]
                             del context[key]
 
-    def change_keys_values(self, config_updated, keys):
-        for k in self.key_split_dataset_change_lst:
-            if k in keys:
-                value = config_updated[k]
-                if k == 'method' and value == "per_center":
+    def change_keys_values(self, config_updated: dict, keys: List[str]):
+        """
+        This function sets DATA_TESTING->DATA_TYPE to "institution_id" if method value is per_center,
+        DATA_TESTING->DATA_VALUE to the value of center_test.
+        It is basically verifying some conditions and set values to the `config_updated`.
+
+        Args:
+            config_updated (dict): Configuration dictionary to update.
+            keys (List[str]): The keys to consider.
+        """
+        for key_to_change in self.key_split_dataset_change_lst:
+            if key_to_change in keys:
+                value: Any = config_updated[key_to_change]
+                # If the method is per_center, the data_testing->data_type value becomes "institution_id".
+                if key_to_change == 'method' and value == "per_center":
                     config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] = "institution_id"
-                if k == 'center_test' and \
+                # If [the key is center_test], [data_testing->data_type == "institution_id"] and [the value is not None]
+                # data_testing->data_type value becomes value of config_updated
+                if key_to_change == 'center_test' and \
                         config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] == "institution_id" and \
-                value is not None:
+                        value is not None:
                     config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_VALUE] = value
-                del config_updated[k]
+                # Remove the value of the current key
+                del config_updated[key_to_change]
 
     def _display_differing_keys(self):
         """Display differences between dictionaries.

diff --git a/ivadomed/loader/balanced_sampler.py b/ivadomed/loader/balanced_sampler.py
@@ -1,5 +1,13 @@
+from __future__ import annotations
 import torch
 import numpy as np
+import typing
+
+from typing import Union
+
+if typing.TYPE_CHECKING:
+    from ivadomed.loader.bids_dataset import BidsDataset
+    from ivadomed.loader.bids3d_dataset import Bids3DDataset
 
 
 class BalancedSampler(torch.utils.data.sampler.Sampler):
@@ -19,7 +27,7 @@ class distributions from an imbalanced dataset.
         label_idx (int): Keeps track of the label indices already used for the metadata_dict.
     """
 
-    def __init__(self, dataset, metadata='gt'):
+    def __init__(self, dataset: Union[BidsDataset, Bids3DDataset], metadata: str = 'gt') -> None:
         self.indices = list(range(len(dataset)))
 
         self.nb_samples = len(self.indices)
@@ -39,7 +47,7 @@ def __init__(self, dataset, metadata='gt'):
 
         self.weights = torch.DoubleTensor(weights)
 
-    def _get_label(self, dataset, idx, metadata):
+    def _get_label(self, dataset: Union[BidsDataset, Bids3DDataset], idx: int, metadata: str) -> int:
         """Returns 1 if sample is not empty, 0 if it is empty (only zeros).
 
         Args:

diff --git a/ivadomed/loader/film.py b/ivadomed/loader/film.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
 import json
 from pathlib import Path
 from copy import deepcopy
+from typing import List, Union
 
 import numpy as np
 from loguru import logger
@@ -9,6 +11,14 @@
 from sklearn.neighbors import KernelDensity
 from sklearn.preprocessing import OneHotEncoder
 from ivadomed.keywords import MetadataKW
+import typing
+
+if typing.TYPE_CHECKING:
+    from ivadomed.loader.bids_dataset import BidsDataset
+    from ivadomed.loader.bids3d_dataset import Bids3DDataset
+    from ivadomed.loader.mri2d_segmentation_dataset import MRI2DSegmentationDataset
+
+    import torch.nn as nn
 
 from ivadomed import __path__
 
@@ -19,12 +29,16 @@
                      "acq-MToff_MTS": 3, "acq-MTon_MTS": 4, "acq-T1w_MTS": 5}
 
 
-def normalize_metadata(ds_in, clustering_models, debugging, metadata_type, train_set=False):
+def normalize_metadata(ds_in: Union[BidsDataset, Bids3DDataset, MRI2DSegmentationDataset],
+                       clustering_models: dict,
+                       debugging: bool,
+                       metadata_type: str,
+                       train_set: bool = False) -> (list, OneHotEncoder) | list:
     """Categorize each metadata value using a KDE clustering method, then apply a one-hot-encoding.
 
     Args:
-         ds_in (BidsDataset): Dataset with metadata.
-         clustering_models: Pre-trained clustering model that has been trained on metadata of the training set.
+         ds_in (BidsDataset): Dataset BidsDataset, Bids3D, MRI2D with metadata.
+         clustering_models (dict): Pre-trained clustering model that has been trained on metadata of the training set.
          debugging (bool): If True, extended verbosity and intermediate outputs.
          metadata_type (str): Choice between 'mri_params', 'constrasts' or the name of a column from the
             participants.tsv file.
@@ -105,11 +119,11 @@ class Kde_model():
         kde (sklearn.neighbors.KernelDensity):
         minima (float): Local minima.
     """
-    def __init__(self):
+    def __init__(self) -> None:
         self.kde = KernelDensity()
         self.minima = None
 
-    def train(self, data, value_range, gridsearch_bandwidth_range):
+    def train(self, data: list, value_range: np.ndarray, gridsearch_bandwidth_range: np.ndarray) -> None:
         # reshape data to fit sklearn
         data = np.array(data).reshape(-1, 1)
 
@@ -130,18 +144,18 @@ def train(self, data, value_range, gridsearch_bandwidth_range):
         # find local minima
         self.minima = s[argrelextrema(e, np.less)[0]]
 
-    def predict(self, data):
+    def predict(self, data: float) -> int:
         x = [i for i, m in enumerate(self.minima) if data < m]
         pred = min(x) if len(x) else len(self.minima)
         return pred
 
 
-def clustering_fit(dataset, key_lst):
+def clustering_fit(dataset: list, key_lst: List[str]) -> dict:
     """This function creates clustering models for each metadata type,
     using Kernel Density Estimation algorithm.
 
     Args:
-        datasets (list): data
+        dataset (list): data
         key_lst (list of str): names of metadata to cluster
 
     Returns:
@@ -192,7 +206,7 @@ def check_isMRIparam(mri_param_type: str, mri_param: dict, subject: str, metadat
         return True
 
 
-def get_film_metadata_models(ds_train, metadata_type, debugging=False):
+def get_film_metadata_models(ds_train: MRI2DSegmentationDataset, metadata_type: str, debugging: bool = False):
     """Get FiLM models.
 
     This function pulls the clustering and one-hot encoder models that are used by FiLMedUnet.
@@ -221,7 +235,8 @@ def get_film_metadata_models(ds_train, metadata_type, debugging=False):
     return ds_train, train_onehotencoder, metadata_clustering_models
 
 
-def store_film_params(gammas, betas, metadata_values, metadata, model, film_layers, depth, film_metadata):
+def store_film_params(gammas: dict, betas: dict, metadata_values: list, metadata: list, model: nn.Module,
+                      film_layers: list, depth: int, film_metadata: str) -> (dict, dict, list):
     """Store FiLM params.
 
     Args:
@@ -235,7 +250,7 @@ def store_film_params(gammas, betas, metadata_values, metadata, model, film_laye
         film_metadata (str): Metadata of interest used to modulate the network (e.g., contrast, tumor_type).
 
     Returns:
-        dict, dict: gammas, betas
+        dict, dict, list: gammas, betas, metadata_values
     """
     new_input = [metadata[k][0][film_metadata] for k in range(len(metadata))]
     metadata_values.append(new_input)
@@ -255,7 +270,7 @@ def store_film_params(gammas, betas, metadata_values, metadata, model, film_laye
     return gammas, betas, metadata_values
 
 
-def save_film_params(gammas, betas, metadata_values, depth, ofolder):
+def save_film_params(gammas: dict, betas: dict, metadata_values: list, depth: int, ofolder: str) -> None:
     """Save FiLM params as npy files.
 
     These parameters can be further used for visualisation purposes. They are saved in the `ofolder` with `.npy` format.