Skip to content

Commit

Permalink
Added kwargs for tabular suite + fixed missing kwargs in checks (#1515)
Browse files Browse the repository at this point in the history
  • Loading branch information
JKL98ISR committed May 26, 2022
1 parent ac6412d commit 9659b7f
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 54 deletions.
96 changes: 48 additions & 48 deletions deepchecks/tabular/suites/default_suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
'model_evaluation', 'full_suite']


def single_dataset_integrity() -> Suite:
def single_dataset_integrity(**kwargs) -> Suite:
"""
Create a suite that is meant to detect integrity issues within a single dataset (Deprecated) .
Expand All @@ -44,27 +44,27 @@ def single_dataset_integrity() -> Suite:
'the single_dataset_integrity suite is deprecated, use the data_integrity suite instead',
DeprecationWarning
)
return data_integrity()
return data_integrity(**kwargs)


def data_integrity() -> Suite:
def data_integrity(**kwargs) -> Suite:
"""Create a suite that is meant to detect integrity issues within a single dataset."""
return Suite(
'Data Integrity Suite',
IsSingleValue().add_condition_not_single_value(),
SpecialCharacters().add_condition_ratio_of_special_characters_not_grater_than(),
MixedNulls().add_condition_different_nulls_not_more_than(),
MixedDataTypes().add_condition_rare_type_ratio_not_in_range(),
StringMismatch().add_condition_no_variants(),
DataDuplicates().add_condition_ratio_not_greater_than(),
StringLengthOutOfBounds().add_condition_ratio_of_outliers_not_greater_than(),
ConflictingLabels().add_condition_ratio_of_conflicting_labels_not_greater_than(),
OutlierSampleDetection(),
FeatureLabelCorrelation().add_condition_feature_pps_not_greater_than()
IsSingleValue(**kwargs).add_condition_not_single_value(),
SpecialCharacters(**kwargs).add_condition_ratio_of_special_characters_not_grater_than(),
MixedNulls(**kwargs).add_condition_different_nulls_not_more_than(),
MixedDataTypes(**kwargs).add_condition_rare_type_ratio_not_in_range(),
StringMismatch(**kwargs).add_condition_no_variants(),
DataDuplicates(**kwargs).add_condition_ratio_not_greater_than(),
StringLengthOutOfBounds(**kwargs).add_condition_ratio_of_outliers_not_greater_than(),
ConflictingLabels(**kwargs).add_condition_ratio_of_conflicting_labels_not_greater_than(),
OutlierSampleDetection(**kwargs),
FeatureLabelCorrelation(**kwargs).add_condition_feature_pps_not_greater_than()
)


def train_test_leakage() -> Suite:
def train_test_leakage(**kwargs) -> Suite:
"""
Create a suite that is meant to detect data leakage between the training dataset and the test dataset (Deprecated).
Expand All @@ -76,57 +76,57 @@ def train_test_leakage() -> Suite:
'the train_test_leakage suite is deprecated, use the train_test_validation suite instead',
DeprecationWarning
)
return train_test_validation()
return train_test_validation(**kwargs)


def train_test_validation() -> Suite:
def train_test_validation(**kwargs) -> Suite:
"""Create a suite that is meant to validate correctness of train-test split, including integrity, \
distribution and leakage checks."""
return Suite(
'Train Test Validation Suite',
DatasetsSizeComparison().add_condition_test_train_size_ratio_not_smaller_than(),
NewLabelTrainTest().add_condition_new_labels_not_greater_than(),
DominantFrequencyChange().add_condition_ratio_of_change_not_greater_than(),
CategoryMismatchTrainTest().add_condition_new_category_ratio_not_greater_than(),
StringMismatchComparison().add_condition_no_new_variants(),
DateTrainTestLeakageDuplicates().add_condition_leakage_ratio_not_greater_than(),
DateTrainTestLeakageOverlap().add_condition_leakage_ratio_not_greater_than(),
IndexTrainTestLeakage().add_condition_ratio_not_greater_than(),
IdentifierLeakage().add_condition_pps_not_greater_than(),
TrainTestSamplesMix().add_condition_duplicates_ratio_not_greater_than(),
FeatureLabelCorrelationChange().add_condition_feature_pps_difference_not_greater_than()
DatasetsSizeComparison(**kwargs).add_condition_test_train_size_ratio_not_smaller_than(),
NewLabelTrainTest(**kwargs).add_condition_new_labels_not_greater_than(),
DominantFrequencyChange(**kwargs).add_condition_ratio_of_change_not_greater_than(),
CategoryMismatchTrainTest(**kwargs).add_condition_new_category_ratio_not_greater_than(),
StringMismatchComparison(**kwargs).add_condition_no_new_variants(),
DateTrainTestLeakageDuplicates(**kwargs).add_condition_leakage_ratio_not_greater_than(),
DateTrainTestLeakageOverlap(**kwargs).add_condition_leakage_ratio_not_greater_than(),
IndexTrainTestLeakage(**kwargs).add_condition_ratio_not_greater_than(),
IdentifierLeakage(**kwargs).add_condition_pps_not_greater_than(),
TrainTestSamplesMix(**kwargs).add_condition_duplicates_ratio_not_greater_than(),
FeatureLabelCorrelationChange(**kwargs).add_condition_feature_pps_difference_not_greater_than()
.add_condition_feature_pps_in_train_not_greater_than(),
TrainTestFeatureDrift().add_condition_drift_score_not_greater_than(),
TrainTestLabelDrift().add_condition_drift_score_not_greater_than(),
WholeDatasetDrift().add_condition_overall_drift_value_not_greater_than(),
TrainTestFeatureDrift(**kwargs).add_condition_drift_score_not_greater_than(),
TrainTestLabelDrift(**kwargs).add_condition_drift_score_not_greater_than(),
WholeDatasetDrift(**kwargs).add_condition_overall_drift_value_not_greater_than(),
)


def model_evaluation() -> Suite:
def model_evaluation(**kwargs) -> Suite:
"""Create a suite that is meant to test model performance and overfit."""
return Suite(
'Model Evaluation Suite',
PerformanceReport().add_condition_train_test_relative_degradation_not_greater_than(),
RocReport().add_condition_auc_not_less_than(),
ConfusionMatrixReport(),
SegmentPerformance(),
TrainTestPredictionDrift().add_condition_drift_score_not_greater_than(),
SimpleModelComparison().add_condition_gain_not_less_than(),
ModelErrorAnalysis().add_condition_segments_performance_relative_difference_not_greater_than(),
CalibrationScore(),
RegressionSystematicError().add_condition_systematic_error_ratio_to_rmse_not_greater_than(),
RegressionErrorDistribution().add_condition_kurtosis_not_less_than(),
UnusedFeatures().add_condition_number_of_high_variance_unused_features_not_greater_than(),
BoostingOverfit().add_condition_test_score_percent_decline_not_greater_than(),
ModelInferenceTime().add_condition_inference_time_is_not_greater_than(),
PerformanceReport(**kwargs).add_condition_train_test_relative_degradation_not_greater_than(),
RocReport(**kwargs).add_condition_auc_not_less_than(),
ConfusionMatrixReport(**kwargs),
SegmentPerformance(**kwargs),
TrainTestPredictionDrift(**kwargs).add_condition_drift_score_not_greater_than(),
SimpleModelComparison(**kwargs).add_condition_gain_not_less_than(),
ModelErrorAnalysis(**kwargs).add_condition_segments_performance_relative_difference_not_greater_than(),
CalibrationScore(**kwargs),
RegressionSystematicError(**kwargs).add_condition_systematic_error_ratio_to_rmse_not_greater_than(),
RegressionErrorDistribution(**kwargs).add_condition_kurtosis_not_less_than(),
UnusedFeatures(**kwargs).add_condition_number_of_high_variance_unused_features_not_greater_than(),
BoostingOverfit(**kwargs).add_condition_test_score_percent_decline_not_greater_than(),
ModelInferenceTime(**kwargs).add_condition_inference_time_is_not_greater_than(),
)


def full_suite() -> Suite:
def full_suite(**kwargs) -> Suite:
"""Create a suite that includes many of the implemented checks, for a quick overview of your model and data."""
return Suite(
'Full Suite',
model_evaluation(),
train_test_validation(),
data_integrity(),
model_evaluation(**kwargs),
train_test_validation(**kwargs),
data_integrity(**kwargs),
)
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def __init__(
self,
n_top_show: int = 10,
hash_size: int = 8,
similarity_threshold: float = 0.1
):
super().__init__()
similarity_threshold: float = 0.1,
**kwargs):
super().__init__(**kwargs)
if not (isinstance(n_top_show, int) and (n_top_show >= 0)):
raise DeepchecksValueError('n_top_show must be a positive integer')
self.n_top_show = n_top_show
Expand Down
2 changes: 1 addition & 1 deletion tests/tabular/suites/test_suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_generic_suite(
diabetes_train, diabetes_test, diabetes_model = diabetes_split_dataset_and_model
city_train, city_test, city_model = city_arrogance_split_dataset_and_model
iris_train_single, iris_test_single, iris_model_single= iris_split_dataset_and_model_single_feature
suite = suites.full_suite()
suite = suites.full_suite(imaginery_kwarg='just to make sure all checks have kwargs in the init')

arguments = (
dict(train_dataset=iris_train_single, test_dataset=iris_test_single, model=iris_model_single),
Expand Down
4 changes: 2 additions & 2 deletions tests/vision/base/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def compute(self, context) -> CheckResult:


def test_full_suite_execution_mnist(mnist_dataset_train, mnist_dataset_test, mock_trained_mnist, device):
suite = full_suite()
suite = full_suite(imaginery_kwarg='just to make sure all checks have kwargs in the init')
arguments = (
dict(train_dataset=mnist_dataset_train, test_dataset=mnist_dataset_test,
model=mock_trained_mnist, device=device),
Expand All @@ -250,7 +250,7 @@ def test_full_suite_execution_mnist(mnist_dataset_train, mnist_dataset_test, moc

def test_full_suite_execution_coco(coco_train_visiondata, coco_test_visiondata,
mock_trained_yolov5_object_detection, device):
suite = full_suite()
suite = full_suite(imaginery_kwarg='just to make sure all checks have kwargs in the init')
args = {'train_dataset': coco_train_visiondata, 'test_dataset': coco_test_visiondata,
'model':mock_trained_yolov5_object_detection, 'device': device}
arguments = (
Expand Down

0 comments on commit 9659b7f

Please sign in to comment.