Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for filterting check results by name #2695

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 27 additions & 6 deletions deepchecks/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,36 @@ def __init__(
else:
raise TypeError(f'Unknown type of result - {type(result).__name__}')

def select_results(self, idx: Set[int]) -> List[Union[
def select_results(self, idx: Set[int] = None, names: Set[str] = None) -> List[Union[
'check_types.CheckResult',
'check_types.CheckFailure'
]]:
"""Select results by indexes."""
output = []
for index, result in enumerate(self.results):
if index in idx:
output.append(result)
"""Select results either by indexes or result header names.

Parameters
----------
idx : Set[int], default None
The list of indexes to filter the check results from the results list. If
names is None, then this parameter is required.
names : Set[str], default None
The list of names denoting the header of the check results. If idx is None,
this parameter is required. Both idx and names cannot be passed.

Returns
-------
List[Union['check_types.CheckResult', 'check_types.CheckFailure']] :
A list of check results filtered either by the indexes or by their names.
"""
if idx is None and names is None:
raise DeepchecksNotSupportedError('Either idx or names should be passed')
if idx and names:
raise DeepchecksNotSupportedError('Only one of idx or names should be passed')

if names:
names = [name.lower().replace('_', ' ').strip() for name in names]
output = [result for name in names for result in self.results if result.get_header().lower() == name]
else:
output = [result for index, result in enumerate(self.results) if index in idx]
return output

def __repr__(self):
Expand Down
50 changes: 45 additions & 5 deletions tests/base/check_suite_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
#
"""suites tests"""
import random
from typing import List

from hamcrest import all_of, assert_that, calling, equal_to, has_entry, has_items, has_length, instance_of, is_, raises
from hamcrest import all_of, assert_that, calling, equal_to, has_entry, has_length, instance_of, is_, raises

from deepchecks import __version__
from deepchecks.core import CheckFailure, CheckResult, ConditionCategory, ConditionResult, SuiteResult
from deepchecks.core.errors import DeepchecksValueError
from deepchecks.core.errors import DeepchecksNotSupportedError, DeepchecksValueError
from deepchecks.core.suite import BaseSuite
from deepchecks.tabular import SingleDatasetCheck, Suite, TrainTestCheck
from deepchecks.tabular import Dataset, SingleDatasetCheck, Suite, TrainTestCheck
from deepchecks.tabular import checks as tabular_checks
from deepchecks.tabular.suites import model_evaluation
from deepchecks.tabular import datasets
from deepchecks.tabular.suites import data_integrity, model_evaluation


class SimpleDatasetCheck(SingleDatasetCheck):
Expand Down Expand Up @@ -51,6 +51,46 @@ def test_run_suite_with_incorrect_args():
raises(DeepchecksValueError, r"At least one dataset \(or model\) must be passed to the method!")
)

def test_select_results_with_and_without_args_from_suite_result():
# Arrange
result1 = CheckResult(0, 'check1')
result1.conditions_results = [ConditionResult(ConditionCategory.PASS)]
result2 = CheckResult(0, 'check2')
result2.conditions_results = [ConditionResult(ConditionCategory.FAIL)]
args = {"idx": [1, 2], "names": ['check1', 'check2']}

# Act & Assert
assert_that(
calling(SuiteResult('test', [result1]).select_results).with_args(),
raises(DeepchecksNotSupportedError, r"Either idx or names should be passed")
)
assert_that(
calling(SuiteResult('test', [result1, result2]).select_results).with_args(**args),
raises(DeepchecksNotSupportedError, r"Only one of idx or names should be passed")
)

def test_select_results_with_indexes_and_names_from_suite_result():
# Arrange
data = datasets.regression.avocado.load_data(data_format='DataFrame', as_train_test=False)
ds = Dataset(data, cat_features= ['type'], datetime_name='Date', label= 'AveragePrice')
integ_suite = data_integrity()
suite_result = integ_suite.run(ds)

# Act & Assert
suite_results_by_indexes = suite_result.select_results(idx=[0, 2])
suite_results_by_name = suite_result.select_results(names=['Conflicting Labels - Train Dataset',
'Outlier Sample Detection',
'mixed_Nulls'])

assert_that(len(suite_results_by_indexes), equal_to(2))
assert_that(len(suite_results_by_name), equal_to(3))

assert_that(suite_results_by_indexes[0].get_header(), equal_to('Feature-Feature Correlation'))
assert_that(suite_results_by_indexes[1].get_header(), equal_to('Single Value in Column'))

assert_that(suite_results_by_name[0].get_header(), equal_to('Conflicting Labels - Train Dataset'))
assert_that(suite_results_by_name[1].get_header(), equal_to('Outlier Sample Detection'))
assert_that(suite_results_by_name[2].get_header(), equal_to('Mixed Nulls'))

def test_add_check_to_the_suite():
number_of_checks = random.randint(0, 50)
Expand Down
6 changes: 3 additions & 3 deletions tests/nlp/utils/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
import os
import pathlib
import timeit
import uuid

import numpy as np
import pytest
import uuid
from hamcrest import *

from deepchecks.core.errors import DeepchecksValueError
from deepchecks.nlp.utils.text_properties import (_sample_for_property, calculate_builtin_properties,
english_text, TOXICITY_MODEL_NAME_ONNX)
from deepchecks.nlp.utils.text_properties import (TOXICITY_MODEL_NAME_ONNX, _sample_for_property,
calculate_builtin_properties, english_text)
from deepchecks.nlp.utils.text_properties_models import MODELS_STORAGE, _get_transformer_model_and_tokenizer


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
"""Contains unit tests for the confusion_matrix_report check."""
import numpy as np
from hamcrest import assert_that, calling, greater_than, has_length, raises, equal_to
from hamcrest import assert_that, calling, equal_to, greater_than, has_length, raises

from deepchecks.core.condition import ConditionCategory
from deepchecks.core.errors import DeepchecksNotSupportedError, DeepchecksValueError, ModelValidationError
Expand Down