Merge branch 'master' into typehint_sample_meta_data

ivadomed · Nov 20, 2022 · 1af22bb · 1af22bb
2 parents f3ea68f + 0adcbbf
commit 1af22bb
Show file tree

Hide file tree

Showing 7 changed files with 177 additions and 94 deletions.
diff --git a/docs/source/configuration_file.rst b/docs/source/configuration_file.rst
@@ -911,6 +911,11 @@ Split Dataset
         }
     }
 
+.. note::
+    .. line-block::
+            The fraction of the dataset used as validation set will correspond to ``1 - train_fraction - test_fraction``.
+            For example: ``1 - 0.6 - 0.2 = 0.2``.
+
 
 Training Parameters
 -------------------

diff --git a/ivadomed/config_manager.py b/ivadomed/config_manager.py
@@ -1,58 +1,61 @@
 import json
 import collections.abc
+from typing import Dict, List, Any, KeysView, Union
+
 from loguru import logger
 from pathlib import Path
 from ivadomed import utils as imed_utils
 from ivadomed.keywords import ConfigKW, LoaderParamsKW, SplitDatasetKW, DataTestingKW
 import copy
 
 
-def update(d, u):
+def update(source_dict: dict, destination_dict: dict) -> dict:
     """Update dictionary and nested dictionaries.
 
     Args:
-        d (dict): Source dictionary that is updated by destination dictionary.
-        u (dict): Destination dictionary.
+        source_dict (dict): Source dictionary that is updated by destination dictionary.
+        destination_dict (dict): Destination dictionary.
 
     Returns:
         dict: updated dictionary
     """
-    for k, v in u.items():
-        if isinstance(v, collections.abc.Mapping):
-            d[k] = update(d.get(k, {}), v)
+    for key, value in destination_dict.items():
+        if isinstance(value, collections.abc.Mapping):
+            source_dict[key] = update(source_dict.get(key, {}), value)
         else:
             # If source dictionary has keys that the destination dict doesn't have, keep these keys
-            if k in d and isinstance(d[k], collections.abc.Mapping) and not isinstance(v, collections.abc.Mapping):
+            if key in source_dict and isinstance(source_dict[key], collections.abc.Mapping) and not isinstance(value,
+                                                                                                               collections.abc.Mapping):
                 pass
             else:
-                d[k] = v
-    return d
+                source_dict[key] = value
+    return source_dict
 
 
-def deep_dict_compare(source_dict, dest_dict, keyname=None):
+def deep_dict_compare(source_dict: dict, destination_dict: dict, keyname: str = None):
     """Compare and display differences between dictionaries (and nested dictionaries).
 
     Args:
         source_dict (dict): Source dictionary.
-        dest_dict (dict): Destination dictionary.
+        destination_dict (dict): Destination dictionary.
         keyname (str): Key name to indicate the path to nested parameter.
 
     """
-    for key in dest_dict:
+    for key in destination_dict:
         if key not in source_dict:
             key_str = key if keyname is None else keyname + key
-            logger.info(f'    {key_str}: {dest_dict[key]}')
+            logger.info(f'    {key_str}: {destination_dict[key]}')
 
         else:
-            if isinstance(dest_dict[key], collections.abc.Mapping):
+            if isinstance(destination_dict[key], collections.abc.Mapping):
                 if isinstance(source_dict[key], collections.abc.Mapping):
-                    deep_dict_compare(source_dict[key], dest_dict[key], key + ": ")
+                    deep_dict_compare(source_dict[key], destination_dict[key], key + ": ")
                 # In case a new dictionary appears in updated file
                 else:
-                    deep_dict_compare(source_dict, dest_dict[key], key + ": ")
+                    deep_dict_compare(source_dict, destination_dict[key], key + ": ")
 
 
-def load_json(config_path):
+def load_json(config_path: str) -> dict:
     """Load json file content
 
     Args:
@@ -67,7 +70,7 @@ def load_json(config_path):
     return default_config
 
 
-# To ensure retrocompatibility for parameter changes in configuration file
+# To ensure retro-compatibility for parameter changes in configuration file
 KEY_CHANGE_DICT = {'UNet3D': ConfigKW.MODIFIED_3D_UNET, 'bids_path': LoaderParamsKW.PATH_DATA,
                    'log_directory': ConfigKW.PATH_OUTPUT}
 KEY_SPLIT_DATASET_CHANGE_LST = ['method', 'center_test']
@@ -84,68 +87,114 @@ class ConfigurationManager(object):
         context_original (dict): Provided configuration file.
         config_updated (dict): Updated configuration file.
     """
-    def __init__(self, path_context):
-        self.path_context = path_context
-        self.key_change_dict = KEY_CHANGE_DICT
-        self.key_split_dataset_change_lst = KEY_SPLIT_DATASET_CHANGE_LST
+
+    def __init__(self, path_context: str):
+        """
+        Initialize the ConfigurationManager by validating the given path and loading the file.
+        Also load the default configuration file.
+
+        Args:
+            path_context (str): Path to configuration file.
+        """
+        self.path_context: str = path_context
+        self.key_change_dict: Dict[str, str] = KEY_CHANGE_DICT
+        self.key_split_dataset_change_lst: List[str] = KEY_SPLIT_DATASET_CHANGE_LST
         self._validate_path()
-        default_config_path = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json"))
-        self.config_default = load_json(default_config_path)
-        self.context_original = load_json(path_context)
-        self.config_updated = {}
+        default_config_path: str = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json"))
+        self.config_default: dict = load_json(default_config_path)
+        self.context_original: dict = load_json(path_context)
+        self.config_updated: dict = {}
 
     @property
-    def config_updated(self):
+    def config_updated(self) -> dict:
+        """
+        This function simply returns the attribute `_config_updated`.
+
+        Returns:
+            dict: `_config_updated` attribute
+        """
         return self._config_updated
 
     @config_updated.setter
-    def config_updated(self, config_updated):
+    def config_updated(self, config_updated: dict):
+        """
+        If config_updated is empty we copy the loaded configuration into it and apply some changes (changing keys name,
+        changing values, deleting key-value pair) to ensure retro-compatibility.
+        Sets the new config_updated to the attribute `_config_updated`.
+
+        Args:
+            config_updated (dict): The new configuration to set.
+        """
         if config_updated == {}:
-            context = copy.deepcopy(self.context_original)
+            context: dict = copy.deepcopy(self.context_original)
             self.change_keys(context, list(context.keys()))
-            config_updated = update(self.config_default, context)
-            self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET], config_updated[ConfigKW.SPLIT_DATASET].keys())
+            config_updated: dict = update(self.config_default, context)
+            self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET],
+                                    config_updated[ConfigKW.SPLIT_DATASET].keys())
 
-        self._config_updated = config_updated
+        self._config_updated: dict = config_updated
         if config_updated['debugging']:
             self._display_differing_keys()
 
-    def get_config(self):
+    def get_config(self) -> dict:
         """Get updated configuration file with all parameters from the default config file.
+
         Returns:
             dict: Updated configuration dict.
         """
         return self.config_updated
 
-    def change_keys(self, context, keys):
-        for k in keys:
+    def change_keys(self, context: Union[dict, collections.abc.Mapping], keys: List[str]):
+        """
+        This function changes the name of the keys of the context dictionary, that are also in the `key_change_dict`
+        attribute, to the values that are associated with them in the `key_change_dict` attribute.
+
+        Args:
+            context (Union[dict, collections.abc.Mapping]): The dictionary to change.
+            keys (List[str]): The keys in context to consider.
+        """
+        for key_to_change in keys:
             # Verify if key is still in the dict
-            if k in context:
-                if k == "NumpyToTensor":
-                    del context[k]
+            if key_to_change in context:
+                # If the key_to_change is "NumpyToTensor", remove it from the context.
+                if key_to_change == "NumpyToTensor":
+                    del context[key_to_change]
                     continue
-                v = context[k]
+                value_to_change: Any = context[key_to_change]
                 # Verify if value is a dictionary
-                if isinstance(v, collections.abc.Mapping):
-                    self.change_keys(v, list(v.keys()))
+                if isinstance(value_to_change, collections.abc.Mapping):
+                    self.change_keys(value_to_change, list(value_to_change.keys()))
                 else:
                     # Change keys from the key_change_dict
                     for key in self.key_change_dict:
                         if key in context:
                             context[self.key_change_dict[key]] = context[key]
                             del context[key]
 
-    def change_keys_values(self, config_updated, keys):
-        for k in self.key_split_dataset_change_lst:
-            if k in keys:
-                value = config_updated[k]
-                if k == 'method' and value == "per_center":
+    def change_keys_values(self, config_updated: dict, keys: List[str]):
+        """
+        This function sets DATA_TESTING->DATA_TYPE to "institution_id" if method value is per_center,
+        DATA_TESTING->DATA_VALUE to the value of center_test.
+        It is basically verifying some conditions and set values to the `config_updated`.
+
+        Args:
+            config_updated (dict): Configuration dictionary to update.
+            keys (List[str]): The keys to consider.
+        """
+        for key_to_change in self.key_split_dataset_change_lst:
+            if key_to_change in keys:
+                value: Any = config_updated[key_to_change]
+                # If the method is per_center, the data_testing->data_type value becomes "institution_id".
+                if key_to_change == 'method' and value == "per_center":
                     config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] = "institution_id"
-                if k == 'center_test' and \
+                # If [the key is center_test], [data_testing->data_type == "institution_id"] and [the value is not None]
+                # data_testing->data_type value becomes value of config_updated
+                if key_to_change == 'center_test' and \
                         config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] == "institution_id" and \
-                value is not None:
+                        value is not None:
                     config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_VALUE] = value
-                del config_updated[k]
+                # Remove the value of the current key
+                del config_updated[key_to_change]
 
     def _display_differing_keys(self):
         """Display differences between dictionaries.

diff --git a/ivadomed/loader/balanced_sampler.py b/ivadomed/loader/balanced_sampler.py
@@ -1,5 +1,13 @@
+from __future__ import annotations
 import torch
 import numpy as np
+import typing
+
+from typing import Union
+
+if typing.TYPE_CHECKING:
+    from ivadomed.loader.bids_dataset import BidsDataset
+    from ivadomed.loader.bids3d_dataset import Bids3DDataset
 
 
 class BalancedSampler(torch.utils.data.sampler.Sampler):
@@ -19,7 +27,7 @@ class distributions from an imbalanced dataset.
         label_idx (int): Keeps track of the label indices already used for the metadata_dict.
     """
 
-    def __init__(self, dataset, metadata='gt'):
+    def __init__(self, dataset: Union[BidsDataset, Bids3DDataset], metadata: str = 'gt') -> None:
         self.indices = list(range(len(dataset)))
 
         self.nb_samples = len(self.indices)
@@ -39,7 +47,7 @@ def __init__(self, dataset, metadata='gt'):
 
         self.weights = torch.DoubleTensor(weights)
 
-    def _get_label(self, dataset, idx, metadata):
+    def _get_label(self, dataset: Union[BidsDataset, Bids3DDataset], idx: int, metadata: str) -> int:
         """Returns 1 if sample is not empty, 0 if it is empty (only zeros).
 
         Args:

diff --git a/ivadomed/loader/bids_dataframe.py b/ivadomed/loader/bids_dataframe.py
@@ -31,7 +31,7 @@ class BidsDataframe:
         df (pd.DataFrame): Dataframe containing dataset information
     """
 
-    def __init__(self, loader_params: dict, path_output: str, derivatives: bool, split_method: str = None):
+    def __init__(self, loader_params: dict, path_output: str, derivatives: bool, split_method: str = None) -> None:
 
         # paths_data from loader parameters
         self.paths_data = loader_params['path_data']
@@ -74,7 +74,7 @@ def __init__(self, loader_params: dict, path_output: str, derivatives: bool, spl
         # Save dataframe as csv file
         self.save(str(Path(path_output, "bids_dataframe.csv")))
 
-    def create_bids_dataframe(self):
+    def create_bids_dataframe(self) -> None:
         """Generate the dataframe."""
 
         for path_data in self.paths_data:
@@ -194,7 +194,7 @@ def create_bids_dataframe(self):
         # Drop columns with all null values
         self.df.dropna(axis=1, inplace=True, how='all')
 
-    def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDSLayout):
+    def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDSLayout) -> pd.DataFrame:
         """Add tsv files metadata to dataframe.
 
         Args:
@@ -253,7 +253,7 @@ def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDS
 
         return df
 
-    def get_subjects_with_derivatives(self):
+    def get_subjects_with_derivatives(self) -> (list, list):
         """Get lists of subject filenames with available derivatives.
 
         Returns:
@@ -284,23 +284,23 @@ def get_subjects_with_derivatives(self):
 
         return has_deriv, deriv
 
-    def get_subject_fnames(self):
+    def get_subject_fnames(self) -> list:
         """Get the list of subject filenames in dataframe.
 
         Returns:
             list: subject filenames.
         """
         return self.df[~self.df['path'].str.contains('derivatives')]['filename'].to_list()
 
-    def get_deriv_fnames(self):
+    def get_deriv_fnames(self) -> list:
         """Get the list of derivative filenames in dataframe.
 
         Returns:
             list: derivative filenames.
         """
         return self.df[self.df['path'].str.contains('derivatives')]['filename'].tolist()
 
-    def get_derivatives(self, subject_fname: str, deriv_fnames: list):
+    def get_derivatives(self, subject_fname: str, deriv_fnames: list) -> list:
         """Return list of available derivative filenames for a subject filename.
 
         Args:
@@ -313,7 +313,7 @@ def get_derivatives(self, subject_fname: str, deriv_fnames: list):
         prefix_fname = subject_fname.split('.')[0]
         return [d for d in deriv_fnames if prefix_fname in d]
 
-    def save(self, path: str):
+    def save(self, path: str) -> None:
         """Save the dataframe into a csv file.
 
         Args:
@@ -325,7 +325,7 @@ def save(self, path: str):
         except FileNotFoundError:
             logger.error(f"Wrong path, bids_dataframe.csv could not be saved in {path}.")
 
-    def write_derivatives_dataset_description(self, path_data: str):
+    def write_derivatives_dataset_description(self, path_data: str) -> None:
         """Writes default dataset_description.json file if not found in path_data/derivatives folder
 
         Args: