Skip to content

Commit

Permalink
add docs (#233)
Browse files Browse the repository at this point in the history
* add docs

* update docs

* update docs

* remove links

* update docs

* update docs

* update docs

* update docs

* update docs

Co-authored-by: Matan Perlmutter <matan@deepchecks.com>
  • Loading branch information
matanper and Matan Perlmutter committed Dec 9, 2021
1 parent db287cf commit c9aadef
Show file tree
Hide file tree
Showing 4 changed files with 896 additions and 22 deletions.
5 changes: 3 additions & 2 deletions deepchecks/base/check.py
Expand Up @@ -41,7 +41,7 @@ class Condition:
def __init__(self, name: str, function: Callable, params: Dict):
if not isinstance(function, Callable):
raise DeepchecksValueError(f'Condition must be a function `(Any) -> Union[ConditionResult, bool]`, '
f'but got: {type(function).__name__}')
f'but got: {type(function).__name__}')
if not isinstance(name, str):
raise DeepchecksValueError(f'Condition name must be of type str but got: {type(name).__name__}')
self.name = name
Expand Down Expand Up @@ -279,7 +279,8 @@ def __repr__(self, tabs=0, prefix=''):

def params(self) -> Dict:
"""Return parameters to show when printing the check."""
return {k: v for k, v in vars(self).items() if not k.startswith('_') and v is not None}
return {k: v for k, v in vars(self).items()
if not k.startswith('_') and v is not None and not callable(v)}

def clean_conditions(self):
"""Remove all conditions from this check instance."""
Expand Down
39 changes: 19 additions & 20 deletions deepchecks/checks/methodology/datasets_size_comparison.py
Expand Up @@ -14,14 +14,14 @@
from deepchecks import Dataset, CheckResult, ConditionResult, TrainTestBaseCheck


__all__ = ['DatasetsSizeComparison',]
__all__ = ['DatasetsSizeComparison']


T = t.TypeVar('T', bound='DatasetsSizeComparison')


class DatasetsSizeComparison(TrainTestBaseCheck):
"""Verify Test dataset size comparing it to the Train dataset size."""
"""Verify test dataset size comparing it to the train dataset size."""

def run(self, train_dataset: Dataset, test_dataset: Dataset, model: object = None) -> CheckResult:
"""Run check instance.
Expand All @@ -44,13 +44,11 @@ def run(self, train_dataset: Dataset, test_dataset: Dataset, model: object = Non
check_name = type(self).__name__
Dataset.validate_dataset(train_dataset, check_name)
Dataset.validate_dataset(test_dataset, check_name)
result = pd.DataFrame.from_dict({
'train': {'size': train_dataset.n_samples},
'test': {'size': test_dataset.n_samples},
})
sizes = {'Train': train_dataset.n_samples, 'Test': test_dataset.n_samples}
display = pd.DataFrame(sizes, index=['Size'])
return CheckResult(
value=result,
display=result
value=sizes,
display=display
)

def add_condition_test_size_not_smaller_than(self: T, value: int = 100) -> T:
Expand All @@ -62,15 +60,15 @@ def add_condition_test_size_not_smaller_than(self: T, value: int = 100) -> T:
Returns:
Self: current instance of the DatasetsSizeComparison check.
"""
def condition(check_result: pd.DataFrame) -> ConditionResult:
def condition(check_result: dict) -> ConditionResult:
return (
ConditionResult(False, f'Test dataset is smaller than {value}.')
if check_result['test']['size'] <= value # type: ignore
ConditionResult(False, f'Test dataset is {check_result["Test"]}')
if check_result['Test'] <= value
else ConditionResult(True)
)

return self.add_condition(
name=f'Test dataset size is not smaller than {value}.',
name=f'Test dataset size is not smaller than {value}',
condition_func=condition
)

Expand All @@ -84,14 +82,15 @@ def add_condition_test_train_size_ratio_not_smaller_than(self: T, ratio: float =
Self: current instance of the DatasetsSizeComparison check.
"""

def condition(check_result: pd.DataFrame) -> ConditionResult:
if (check_result['test']['size'] / check_result['train']['size']) <= ratio: # type: ignore
return ConditionResult(False, f'Test-Train size ratio is smaller than {ratio}.')
def condition(check_result: dict) -> ConditionResult:
test_train_ratio = check_result['Test'] / check_result['Train']
if test_train_ratio <= ratio:
return ConditionResult(False, f'Test-Train size ratio is {test_train_ratio}')
else:
return ConditionResult(True)

return self.add_condition(
name=f'Test-Train size ratio is not smaller than {ratio}.',
name=f'Test-Train size ratio is not smaller than {ratio}',
condition_func=condition
)

Expand All @@ -102,13 +101,13 @@ def add_condition_train_dataset_not_smaller_than_test(self: T) -> T:
Self: current instance of the DatasetsSizeComparison check.
"""

def condition(check_result: pd.DataFrame) -> ConditionResult:
if check_result['train']['size'] < check_result['test']['size']: # type: ignore
return ConditionResult(False, 'Train dataset is smaller than test dataset.')
def condition(check_result: dict) -> ConditionResult:
if check_result['Train'] < check_result['Test']:
return ConditionResult(False, 'Train dataset is smaller than test dataset')
else:
return ConditionResult(True)

return self.add_condition(
name='Train dataset is not smaller than test dataset.',
name='Train dataset is not smaller than test dataset',
condition_func=condition
)

0 comments on commit c9aadef

Please sign in to comment.