Skip to content

Commit

Permalink
Add to suite output the checks that haven't run because of missing pa…
Browse files Browse the repository at this point in the history
…rameters (#436)

* Add to suite output the checks that haven't run because of missing parameters

* Update suite tests

* Add chkeckfailure repr
  • Loading branch information
matanper committed Jan 3, 2022
1 parent b0c3576 commit 6c61816
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 99 deletions.
10 changes: 9 additions & 1 deletion deepchecks/base/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import enum
import inspect
import re
import traceback
from collections import OrderedDict
from functools import wraps
from typing import Any, Callable, List, Union, Dict, cast, Mapping
Expand Down Expand Up @@ -498,6 +499,13 @@ def run_logic(self, context: ModelComparisonContext):
class CheckFailure:
"""Class which holds a run exception of a check."""

def __init__(self, check: Any, exception: Exception):
def __init__(self, check: BaseCheck, exception: Exception):
self.check = check
self.exception = exception
self.header = check.name()

def __repr__(self):
"""Return string representation."""
tb_str = traceback.format_exception(etype=type(self.exception), value=self.exception,
tb=self.exception.__traceback__)
return ''.join(tb_str)
22 changes: 19 additions & 3 deletions deepchecks/base/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import Union, List, Optional, Tuple, Any, Container, Mapping

from deepchecks.base.display_suite import display_suite_result, ProgressBar
from deepchecks.errors import DeepchecksValueError
from deepchecks.errors import DeepchecksValueError, DeepchecksNotSupportedError
from deepchecks.base import Dataset
from deepchecks.base.check import (CheckResult, TrainTestBaseCheck, SingleDatasetBaseCheck, ModelOnlyBaseCheck,
CheckFailure, ModelComparisonBaseCheck, ModelComparisonContext)
Expand Down Expand Up @@ -163,19 +163,30 @@ def run(
check_result = check.run(train_dataset=train_dataset, test_dataset=test_dataset,
model=model)
results.append(check_result)
else:
results.append(Suite._get_unsupported_failure(check))
elif isinstance(check, SingleDatasetBaseCheck):
if train_dataset is not None:
check_result = check.run(dataset=train_dataset, model=model)
check_result.header = f'{check_result.get_header()} - Train Dataset'
# In case of train & test, doesn't want to skip test if train fails. so have to explicitly
# wrap it in try/except
try:
check_result = check.run(dataset=train_dataset, model=model)
check_result.header = f'{check_result.get_header()} - Train Dataset'
except Exception as exp:
check_result = CheckFailure(check.__class__, exp)
results.append(check_result)
if test_dataset is not None:
check_result = check.run(dataset=test_dataset, model=model)
check_result.header = f'{check_result.get_header()} - Test Dataset'
results.append(check_result)
if train_dataset is None and test_dataset is None:
results.append(Suite._get_unsupported_failure(check))
elif isinstance(check, ModelOnlyBaseCheck):
if model is not None:
check_result = check.run(model=model)
results.append(check_result)
else:
results.append(Suite._get_unsupported_failure(check))
else:
raise TypeError(f'Don\'t know how to handle type {check.__class__.__name__} in suite.')
except Exception as exp:
Expand All @@ -185,6 +196,11 @@ def run(
progress_bar.close()
return SuiteResult(self.name, results)

@classmethod
def _get_unsupported_failure(cls, check):
msg = 'Check is not supported for parameters given to suite'
return CheckFailure(check.__class__, DeepchecksNotSupportedError(msg))


class ModelComparisonSuite(BaseSuite):
"""Suite to run checks of types: CompareModelsBaseCheck."""
Expand Down
132 changes: 37 additions & 95 deletions tests/suites/test_suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from hamcrest.core.matcher import Matcher
from hamcrest import assert_that, instance_of, only_contains, any_of
from hamcrest import assert_that, instance_of, only_contains, any_of, has_length

from deepchecks import suites, Dataset, SuiteResult, CheckResult, CheckFailure
from deepchecks import suites, Dataset, SuiteResult, CheckResult, CheckFailure, Suite, SingleDatasetBaseCheck
from deepchecks.errors import DeepchecksBaseError


Expand Down Expand Up @@ -48,61 +48,6 @@ def iris(iris_clean) -> t.Tuple[Dataset, Dataset, AdaBoostClassifier]:
return train, test, model


# @pytest.fixture()
# def iris_with_non_textual_columns(iris_clean) -> t.Tuple[Dataset, Dataset, AdaBoostClassifier]:
# df = t.cast(pd.DataFrame, iris_clean.frame.copy())
#
# # TODO: generate non textual columns automaticly, use not only integer
# df[5] = range(len(df))
# df[6] = datetime.now()
#
# # NOTE:
# # if you try to use some random integers as column names
# # then with big probability test will fall
# #
# # it looks like sklearn requires column names with dtype int to be in range [0, n_columns]
# #
# # in my case UnusedFeatures check was the reason why test failed
# # more precisly it failed at the next line in unused_features.py module:
#
# # >>> ... pre_pca_transformer.fit_transform(
# # ... dataset.features_columns().sample(n_samples, random_state=self.random_state)
# # ... ) ...
# #
# # with next exception: ValueError('all features must be in [0, 3] or [-4, 0]')
#
# renamer = {
# 'sepal length (cm)': 0,
# 'sepal width (cm)': 1,
# 'petal length (cm)': 2,
# 'petal width (cm)': 3,
# 'target': 4
# }
#
# train, test = t.cast(
# t.Tuple[pd.DataFrame, pd.DataFrame],
# train_test_split(df, test_size=0.33, random_state=42)
# )
#
# train, test = (
# Dataset(
# train.rename(columns=renamer),
# features=list(renamer.values())[:-1],
# label_name=4, datetime_name=6, index_name=5
# ),
# Dataset(
# test.rename(columns=renamer),
# features=list(renamer.values())[:-1],
# label_name=4, datetime_name=6, index_name=5
# )
# )
#
# model = AdaBoostClassifier(random_state=0)
# model.fit(train.features_columns, train.label_col)
#
# return train, test, model


def test_generic_suite(
iris: t.Tuple[Dataset, Dataset, AdaBoostClassifier],
diabetes_split_dataset_and_model: t.Tuple[Dataset, Dataset, object],
Expand All @@ -122,56 +67,53 @@ def test_generic_suite(
test_dataset=diabetes_test,
model=diabetes_model
),
dict(model=diabetes_model)
)

for args in arguments:
result = suite.run(**args)
validate_suite_result(result, expected_results='mixed')
# Calculate number of expected results
length = get_expected_results_length(suite, args)
validate_suite_result(result, length)


def validate_suite_result(
result: SuiteResult,
*,
expected_results: str = 'only successful',
length: int,
exception_matcher: t.Optional[Matcher] = None
):
"""
Args:
expected_results (Literal['only successful'] | Literal['only failed'] | Literal['mixed'])
"""
assert_that(result, instance_of(SuiteResult))
assert_that(result.results, instance_of(list))
assert_that(result.results, has_length(length))

exception_matcher = exception_matcher or only_contains(instance_of(DeepchecksBaseError))

if expected_results == 'only successful':
assert_that(result.results, only_contains(any_of( # type: ignore
instance_of(CheckResult)
)))

elif expected_results == 'only failed':
assert_that(result.results, only_contains(any_of( # type: ignore
instance_of(CheckFailure)
)))
assert_that(
actual=[it.exception for it in result.results], # type: ignore
matcher=exception_matcher, # type: ignore
)

elif expected_results == 'mixed':
assert_that(result.results, only_contains(any_of( # type: ignore
instance_of(CheckFailure),
instance_of(CheckResult),
)))

failures = [
it.exception
for it in result.results
if isinstance(it, CheckFailure)
]

if len(failures) != 0:
assert_that(actual=failures, matcher=exception_matcher) # type: ignore

else:
raise ValueError(f'Unknown value of "expected_results" - {expected_results}')
assert_that(result.results, only_contains(any_of( # type: ignore
instance_of(CheckFailure),
instance_of(CheckResult),
)))

failures = [
it.exception
for it in result.results
if isinstance(it, CheckFailure)
]

if len(failures) != 0:
assert_that(actual=failures, matcher=exception_matcher) # type: ignore


def get_expected_results_length(suite: Suite, args: t.Dict):
num_single = len([c for c in suite.checks.values() if isinstance(c, SingleDatasetBaseCheck)])
num_others = len(suite.checks.values()) - num_single
multiply = 0
if 'train_dataset' in args:
multiply += 1
if 'test_dataset' in args:
multiply += 1
# If no train and no test (only model) there will be single result of check failure
if multiply == 0:
multiply = 1

return num_single * multiply + num_others

0 comments on commit 6c61816

Please sign in to comment.