Skip to content

Commit

Permalink
Merge branch 'master' into typehint_sample_meta_data
Browse files Browse the repository at this point in the history
  • Loading branch information
dyt811 committed Nov 20, 2022
2 parents f3ea68f + 0adcbbf commit 1af22bb
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 94 deletions.
5 changes: 5 additions & 0 deletions docs/source/configuration_file.rst
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,11 @@ Split Dataset
}
}
.. note::
.. line-block::
The fraction of the dataset used as validation set will correspond to ``1 - train_fraction - test_fraction``.
For example: ``1 - 0.6 - 0.2 = 0.2``.


Training Parameters
-------------------
Expand Down
147 changes: 98 additions & 49 deletions ivadomed/config_manager.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,61 @@
import json
import collections.abc
from typing import Dict, List, Any, KeysView, Union

from loguru import logger
from pathlib import Path
from ivadomed import utils as imed_utils
from ivadomed.keywords import ConfigKW, LoaderParamsKW, SplitDatasetKW, DataTestingKW
import copy


def update(d, u):
def update(source_dict: dict, destination_dict: dict) -> dict:
"""Update dictionary and nested dictionaries.
Args:
d (dict): Source dictionary that is updated by destination dictionary.
u (dict): Destination dictionary.
source_dict (dict): Source dictionary that is updated by destination dictionary.
destination_dict (dict): Destination dictionary.
Returns:
dict: updated dictionary
"""
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, {}), v)
for key, value in destination_dict.items():
if isinstance(value, collections.abc.Mapping):
source_dict[key] = update(source_dict.get(key, {}), value)
else:
# If source dictionary has keys that the destination dict doesn't have, keep these keys
if k in d and isinstance(d[k], collections.abc.Mapping) and not isinstance(v, collections.abc.Mapping):
if key in source_dict and isinstance(source_dict[key], collections.abc.Mapping) and not isinstance(value,
collections.abc.Mapping):
pass
else:
d[k] = v
return d
source_dict[key] = value
return source_dict


def deep_dict_compare(source_dict, dest_dict, keyname=None):
def deep_dict_compare(source_dict: dict, destination_dict: dict, keyname: str = None):
"""Compare and display differences between dictionaries (and nested dictionaries).
Args:
source_dict (dict): Source dictionary.
dest_dict (dict): Destination dictionary.
destination_dict (dict): Destination dictionary.
keyname (str): Key name to indicate the path to nested parameter.
"""
for key in dest_dict:
for key in destination_dict:
if key not in source_dict:
key_str = key if keyname is None else keyname + key
logger.info(f' {key_str}: {dest_dict[key]}')
logger.info(f' {key_str}: {destination_dict[key]}')

else:
if isinstance(dest_dict[key], collections.abc.Mapping):
if isinstance(destination_dict[key], collections.abc.Mapping):
if isinstance(source_dict[key], collections.abc.Mapping):
deep_dict_compare(source_dict[key], dest_dict[key], key + ": ")
deep_dict_compare(source_dict[key], destination_dict[key], key + ": ")
# In case a new dictionary appears in updated file
else:
deep_dict_compare(source_dict, dest_dict[key], key + ": ")
deep_dict_compare(source_dict, destination_dict[key], key + ": ")


def load_json(config_path):
def load_json(config_path: str) -> dict:
"""Load json file content
Args:
Expand All @@ -67,7 +70,7 @@ def load_json(config_path):
return default_config


# To ensure retrocompatibility for parameter changes in configuration file
# To ensure retro-compatibility for parameter changes in configuration file
KEY_CHANGE_DICT = {'UNet3D': ConfigKW.MODIFIED_3D_UNET, 'bids_path': LoaderParamsKW.PATH_DATA,
'log_directory': ConfigKW.PATH_OUTPUT}
KEY_SPLIT_DATASET_CHANGE_LST = ['method', 'center_test']
Expand All @@ -84,68 +87,114 @@ class ConfigurationManager(object):
context_original (dict): Provided configuration file.
config_updated (dict): Updated configuration file.
"""
def __init__(self, path_context):
self.path_context = path_context
self.key_change_dict = KEY_CHANGE_DICT
self.key_split_dataset_change_lst = KEY_SPLIT_DATASET_CHANGE_LST

def __init__(self, path_context: str):
"""
Initialize the ConfigurationManager by validating the given path and loading the file.
Also load the default configuration file.
Args:
path_context (str): Path to configuration file.
"""
self.path_context: str = path_context
self.key_change_dict: Dict[str, str] = KEY_CHANGE_DICT
self.key_split_dataset_change_lst: List[str] = KEY_SPLIT_DATASET_CHANGE_LST
self._validate_path()
default_config_path = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json"))
self.config_default = load_json(default_config_path)
self.context_original = load_json(path_context)
self.config_updated = {}
default_config_path: str = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json"))
self.config_default: dict = load_json(default_config_path)
self.context_original: dict = load_json(path_context)
self.config_updated: dict = {}

@property
def config_updated(self):
def config_updated(self) -> dict:
"""
This function simply returns the attribute `_config_updated`.
Returns:
dict: `_config_updated` attribute
"""
return self._config_updated

@config_updated.setter
def config_updated(self, config_updated):
def config_updated(self, config_updated: dict):
"""
If config_updated is empty we copy the loaded configuration into it and apply some changes (changing keys name,
changing values, deleting key-value pair) to ensure retro-compatibility.
Sets the new config_updated to the attribute `_config_updated`.
Args:
config_updated (dict): The new configuration to set.
"""
if config_updated == {}:
context = copy.deepcopy(self.context_original)
context: dict = copy.deepcopy(self.context_original)
self.change_keys(context, list(context.keys()))
config_updated = update(self.config_default, context)
self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET], config_updated[ConfigKW.SPLIT_DATASET].keys())
config_updated: dict = update(self.config_default, context)
self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET],
config_updated[ConfigKW.SPLIT_DATASET].keys())

self._config_updated = config_updated
self._config_updated: dict = config_updated
if config_updated['debugging']:
self._display_differing_keys()

def get_config(self):
def get_config(self) -> dict:
"""Get updated configuration file with all parameters from the default config file.
Returns:
dict: Updated configuration dict.
"""
return self.config_updated

def change_keys(self, context, keys):
for k in keys:
def change_keys(self, context: Union[dict, collections.abc.Mapping], keys: List[str]):
"""
This function changes the name of the keys of the context dictionary, that are also in the `key_change_dict`
attribute, to the values that are associated with them in the `key_change_dict` attribute.
Args:
context (Union[dict, collections.abc.Mapping]): The dictionary to change.
keys (List[str]): The keys in context to consider.
"""
for key_to_change in keys:
# Verify if key is still in the dict
if k in context:
if k == "NumpyToTensor":
del context[k]
if key_to_change in context:
# If the key_to_change is "NumpyToTensor", remove it from the context.
if key_to_change == "NumpyToTensor":
del context[key_to_change]
continue
v = context[k]
value_to_change: Any = context[key_to_change]
# Verify if value is a dictionary
if isinstance(v, collections.abc.Mapping):
self.change_keys(v, list(v.keys()))
if isinstance(value_to_change, collections.abc.Mapping):
self.change_keys(value_to_change, list(value_to_change.keys()))
else:
# Change keys from the key_change_dict
for key in self.key_change_dict:
if key in context:
context[self.key_change_dict[key]] = context[key]
del context[key]

def change_keys_values(self, config_updated, keys):
for k in self.key_split_dataset_change_lst:
if k in keys:
value = config_updated[k]
if k == 'method' and value == "per_center":
def change_keys_values(self, config_updated: dict, keys: List[str]):
"""
This function sets DATA_TESTING->DATA_TYPE to "institution_id" if method value is per_center,
DATA_TESTING->DATA_VALUE to the value of center_test.
It is basically verifying some conditions and set values to the `config_updated`.
Args:
config_updated (dict): Configuration dictionary to update.
keys (List[str]): The keys to consider.
"""
for key_to_change in self.key_split_dataset_change_lst:
if key_to_change in keys:
value: Any = config_updated[key_to_change]
# If the method is per_center, the data_testing->data_type value becomes "institution_id".
if key_to_change == 'method' and value == "per_center":
config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] = "institution_id"
if k == 'center_test' and \
# If [the key is center_test], [data_testing->data_type == "institution_id"] and [the value is not None]
# data_testing->data_type value becomes value of config_updated
if key_to_change == 'center_test' and \
config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] == "institution_id" and \
value is not None:
value is not None:
config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_VALUE] = value
del config_updated[k]
# Remove the value of the current key
del config_updated[key_to_change]

def _display_differing_keys(self):
"""Display differences between dictionaries.
Expand Down
12 changes: 10 additions & 2 deletions ivadomed/loader/balanced_sampler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from __future__ import annotations
import torch
import numpy as np
import typing

from typing import Union

if typing.TYPE_CHECKING:
from ivadomed.loader.bids_dataset import BidsDataset
from ivadomed.loader.bids3d_dataset import Bids3DDataset


class BalancedSampler(torch.utils.data.sampler.Sampler):
Expand All @@ -19,7 +27,7 @@ class distributions from an imbalanced dataset.
label_idx (int): Keeps track of the label indices already used for the metadata_dict.
"""

def __init__(self, dataset, metadata='gt'):
def __init__(self, dataset: Union[BidsDataset, Bids3DDataset], metadata: str = 'gt') -> None:
self.indices = list(range(len(dataset)))

self.nb_samples = len(self.indices)
Expand All @@ -39,7 +47,7 @@ def __init__(self, dataset, metadata='gt'):

self.weights = torch.DoubleTensor(weights)

def _get_label(self, dataset, idx, metadata):
def _get_label(self, dataset: Union[BidsDataset, Bids3DDataset], idx: int, metadata: str) -> int:
"""Returns 1 if sample is not empty, 0 if it is empty (only zeros).
Args:
Expand Down
18 changes: 9 additions & 9 deletions ivadomed/loader/bids_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class BidsDataframe:
df (pd.DataFrame): Dataframe containing dataset information
"""

def __init__(self, loader_params: dict, path_output: str, derivatives: bool, split_method: str = None):
def __init__(self, loader_params: dict, path_output: str, derivatives: bool, split_method: str = None) -> None:

# paths_data from loader parameters
self.paths_data = loader_params['path_data']
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(self, loader_params: dict, path_output: str, derivatives: bool, spl
# Save dataframe as csv file
self.save(str(Path(path_output, "bids_dataframe.csv")))

def create_bids_dataframe(self):
def create_bids_dataframe(self) -> None:
"""Generate the dataframe."""

for path_data in self.paths_data:
Expand Down Expand Up @@ -194,7 +194,7 @@ def create_bids_dataframe(self):
# Drop columns with all null values
self.df.dropna(axis=1, inplace=True, how='all')

def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDSLayout):
def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDSLayout) -> pd.DataFrame:
"""Add tsv files metadata to dataframe.
Args:
Expand Down Expand Up @@ -253,7 +253,7 @@ def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDS

return df

def get_subjects_with_derivatives(self):
def get_subjects_with_derivatives(self) -> (list, list):
"""Get lists of subject filenames with available derivatives.
Returns:
Expand Down Expand Up @@ -284,23 +284,23 @@ def get_subjects_with_derivatives(self):

return has_deriv, deriv

def get_subject_fnames(self):
def get_subject_fnames(self) -> list:
"""Get the list of subject filenames in dataframe.
Returns:
list: subject filenames.
"""
return self.df[~self.df['path'].str.contains('derivatives')]['filename'].to_list()

def get_deriv_fnames(self):
def get_deriv_fnames(self) -> list:
"""Get the list of derivative filenames in dataframe.
Returns:
list: derivative filenames.
"""
return self.df[self.df['path'].str.contains('derivatives')]['filename'].tolist()

def get_derivatives(self, subject_fname: str, deriv_fnames: list):
def get_derivatives(self, subject_fname: str, deriv_fnames: list) -> list:
"""Return list of available derivative filenames for a subject filename.
Args:
Expand All @@ -313,7 +313,7 @@ def get_derivatives(self, subject_fname: str, deriv_fnames: list):
prefix_fname = subject_fname.split('.')[0]
return [d for d in deriv_fnames if prefix_fname in d]

def save(self, path: str):
def save(self, path: str) -> None:
"""Save the dataframe into a csv file.
Args:
Expand All @@ -325,7 +325,7 @@ def save(self, path: str):
except FileNotFoundError:
logger.error(f"Wrong path, bids_dataframe.csv could not be saved in {path}.")

def write_derivatives_dataset_description(self, path_data: str):
def write_derivatives_dataset_description(self, path_data: str) -> None:
"""Writes default dataset_description.json file if not found in path_data/derivatives folder
Args:
Expand Down
Loading

0 comments on commit 1af22bb

Please sign in to comment.