Add to suite output the checks that haven't run because of missing pa…

…rameters (#436) * Add to suite output the checks that haven't run because of missing parameters * Update suite tests * Add chkeckfailure repr
deepchecks · Jan 3, 2022 · 6c61816 · 6c61816
1 parent b0c3576
commit 6c61816
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 99 deletions.
diff --git a/deepchecks/base/check.py b/deepchecks/base/check.py
@@ -14,6 +14,7 @@
 import enum
 import inspect
 import re
+import traceback
 from collections import OrderedDict
 from functools import wraps
 from typing import Any, Callable, List, Union, Dict, cast, Mapping
@@ -498,6 +499,13 @@ def run_logic(self, context: ModelComparisonContext):
 class CheckFailure:
     """Class which holds a run exception of a check."""
 
-    def __init__(self, check: Any, exception: Exception):
+    def __init__(self, check: BaseCheck, exception: Exception):
         self.check = check
         self.exception = exception
+        self.header = check.name()
+
+    def __repr__(self):
+        """Return string representation."""
+        tb_str = traceback.format_exception(etype=type(self.exception), value=self.exception,
+                                            tb=self.exception.__traceback__)
+        return ''.join(tb_str)
diff --git a/deepchecks/base/suite.py b/deepchecks/base/suite.py
@@ -15,7 +15,7 @@
 from typing import Union, List, Optional, Tuple, Any, Container, Mapping
 
 from deepchecks.base.display_suite import display_suite_result, ProgressBar
-from deepchecks.errors import DeepchecksValueError
+from deepchecks.errors import DeepchecksValueError, DeepchecksNotSupportedError
 from deepchecks.base import Dataset
 from deepchecks.base.check import (CheckResult, TrainTestBaseCheck, SingleDatasetBaseCheck, ModelOnlyBaseCheck,
                                    CheckFailure, ModelComparisonBaseCheck, ModelComparisonContext)
@@ -163,19 +163,30 @@ def run(
                         check_result = check.run(train_dataset=train_dataset, test_dataset=test_dataset,
                                                  model=model)
                         results.append(check_result)
+                    else:
+                        results.append(Suite._get_unsupported_failure(check))
                 elif isinstance(check, SingleDatasetBaseCheck):
                     if train_dataset is not None:
-                        check_result = check.run(dataset=train_dataset, model=model)
-                        check_result.header = f'{check_result.get_header()} - Train Dataset'
+                        # In case of train & test, doesn't want to skip test if train fails. so have to explicitly
+                        # wrap it in try/except
+                        try:
+                            check_result = check.run(dataset=train_dataset, model=model)
+                            check_result.header = f'{check_result.get_header()} - Train Dataset'
+                        except Exception as exp:
+                            check_result = CheckFailure(check.__class__, exp)
                         results.append(check_result)
                     if test_dataset is not None:
                         check_result = check.run(dataset=test_dataset, model=model)
                         check_result.header = f'{check_result.get_header()} - Test Dataset'
                         results.append(check_result)
+                    if train_dataset is None and test_dataset is None:
+                        results.append(Suite._get_unsupported_failure(check))
                 elif isinstance(check, ModelOnlyBaseCheck):
                     if model is not None:
                         check_result = check.run(model=model)
                         results.append(check_result)
+                    else:
+                        results.append(Suite._get_unsupported_failure(check))
                 else:
                     raise TypeError(f'Don\'t know how to handle type {check.__class__.__name__} in suite.')
             except Exception as exp:
@@ -185,6 +196,11 @@ def run(
         progress_bar.close()
         return SuiteResult(self.name, results)
 
+    @classmethod
+    def _get_unsupported_failure(cls, check):
+        msg = 'Check is not supported for parameters given to suite'
+        return CheckFailure(check.__class__, DeepchecksNotSupportedError(msg))
+
 
 class ModelComparisonSuite(BaseSuite):
     """Suite to run checks of types: CompareModelsBaseCheck."""

diff --git a/tests/suites/test_suites.py b/tests/suites/test_suites.py
@@ -18,9 +18,9 @@
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn.model_selection import train_test_split
 from hamcrest.core.matcher import Matcher
-from hamcrest import assert_that, instance_of, only_contains, any_of
+from hamcrest import assert_that, instance_of, only_contains, any_of, has_length
 
-from deepchecks import suites, Dataset, SuiteResult, CheckResult, CheckFailure
+from deepchecks import suites, Dataset, SuiteResult, CheckResult, CheckFailure, Suite, SingleDatasetBaseCheck
 from deepchecks.errors import DeepchecksBaseError
 
 
@@ -48,61 +48,6 @@ def iris(iris_clean) -> t.Tuple[Dataset, Dataset, AdaBoostClassifier]:
     return train, test, model
 
 
-# @pytest.fixture()
-# def iris_with_non_textual_columns(iris_clean) -> t.Tuple[Dataset, Dataset, AdaBoostClassifier]:
-#     df = t.cast(pd.DataFrame, iris_clean.frame.copy())
-#
-#     # TODO: generate non textual columns automaticly, use not only integer
-#     df[5] = range(len(df))
-#     df[6] = datetime.now()
-#
-#     # NOTE:
-#     # if you try to use some random integers as column names
-#     # then with big probability test will fall
-#     #
-#     # it looks like sklearn requires column names with dtype int to be in range [0, n_columns]
-#     #
-#     # in my case UnusedFeatures check was the reason why test failed
-#     # more precisly it failed at the next line in unused_features.py module:
-#
-#     # >>> ... pre_pca_transformer.fit_transform(
-#     # ...        dataset.features_columns().sample(n_samples, random_state=self.random_state)
-#     # ...    ) ...
-#     #
-#     # with next exception: ValueError('all features must be in [0, 3] or [-4, 0]')
-#
-#     renamer = {
-#         'sepal length (cm)': 0,
-#         'sepal width (cm)': 1,
-#         'petal length (cm)': 2,
-#         'petal width (cm)': 3,
-#         'target': 4
-#     }
-#
-#     train, test = t.cast(
-#         t.Tuple[pd.DataFrame, pd.DataFrame],
-#         train_test_split(df, test_size=0.33, random_state=42)
-#     )
-#
-#     train, test = (
-#         Dataset(
-#             train.rename(columns=renamer),
-#             features=list(renamer.values())[:-1],
-#             label_name=4, datetime_name=6, index_name=5
-#         ),
-#         Dataset(
-#             test.rename(columns=renamer),
-#             features=list(renamer.values())[:-1],
-#             label_name=4, datetime_name=6, index_name=5
-#         )
-#     )
-#
-#     model = AdaBoostClassifier(random_state=0)
-#     model.fit(train.features_columns, train.label_col)
-#
-#     return train, test, model
-
-
 def test_generic_suite(
     iris: t.Tuple[Dataset, Dataset, AdaBoostClassifier],
     diabetes_split_dataset_and_model: t.Tuple[Dataset, Dataset, object],
@@ -122,56 +67,53 @@ def test_generic_suite(
             test_dataset=diabetes_test,
             model=diabetes_model
         ),
+        dict(model=diabetes_model)
     )
 
     for args in arguments:
         result = suite.run(**args)
-        validate_suite_result(result, expected_results='mixed')
+        # Calculate number of expected results
+        length = get_expected_results_length(suite, args)
+        validate_suite_result(result, length)
 
 
 def validate_suite_result(
     result: SuiteResult,
-    *,
-    expected_results: str = 'only successful',
+    length: int,
     exception_matcher: t.Optional[Matcher] = None
 ):
-    """
-    Args:
-        expected_results (Literal['only successful'] | Literal['only failed'] | Literal['mixed'])
-    """
     assert_that(result, instance_of(SuiteResult))
     assert_that(result.results, instance_of(list))
+    assert_that(result.results, has_length(length))
 
     exception_matcher = exception_matcher or only_contains(instance_of(DeepchecksBaseError))
 
-    if expected_results == 'only successful':
-        assert_that(result.results, only_contains(any_of( # type: ignore
-            instance_of(CheckResult)
-        )))
-
-    elif expected_results == 'only failed':
-        assert_that(result.results, only_contains(any_of( # type: ignore
-            instance_of(CheckFailure)
-        )))
-        assert_that(
-            actual=[it.exception for it in result.results], # type: ignore
-            matcher=exception_matcher, # type: ignore
-        )
-
-    elif expected_results == 'mixed':
-        assert_that(result.results, only_contains(any_of( # type: ignore
-            instance_of(CheckFailure),
-            instance_of(CheckResult),
-        )))
-
-        failures = [
-            it.exception
-            for it in result.results
-            if isinstance(it, CheckFailure)
-        ]
-
-        if len(failures) != 0:
-            assert_that(actual=failures, matcher=exception_matcher) # type: ignore
-
-    else:
-        raise ValueError(f'Unknown value of "expected_results" - {expected_results}')
+    assert_that(result.results, only_contains(any_of( # type: ignore
+        instance_of(CheckFailure),
+        instance_of(CheckResult),
+    )))
+
+    failures = [
+        it.exception
+        for it in result.results
+        if isinstance(it, CheckFailure)
+    ]
+
+    if len(failures) != 0:
+        assert_that(actual=failures, matcher=exception_matcher) # type: ignore
+
+
+def get_expected_results_length(suite: Suite, args: t.Dict):
+    num_single = len([c for c in suite.checks.values() if isinstance(c, SingleDatasetBaseCheck)])
+    num_others = len(suite.checks.values()) - num_single
+    multiply = 0
+    if 'train_dataset' in args:
+        multiply += 1
+    if 'test_dataset' in args:
+        multiply += 1
+    # If no train and no test (only model) there will be single result of check failure
+    if multiply == 0:
+        multiply = 1
+
+    return num_single * multiply + num_others
+