Skip to content

Commit

Permalink
Mlc 292 model error analysis (#378)
Browse files Browse the repository at this point in the history
Add check partitioning single features into segments of high and low model error.
  • Loading branch information
noamzbr committed Jan 2, 2022
1 parent c3995e7 commit 700598c
Show file tree
Hide file tree
Showing 12 changed files with 48,014 additions and 705 deletions.
18 changes: 16 additions & 2 deletions deepchecks/base/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,8 @@ def data(self) -> pd.DataFrame:
def copy(self: TDataset, new_data) -> TDataset:
"""Create a copy of this Dataset with new data."""
# Filter out if columns were dropped
features = list(set(self._features).intersection(new_data.columns))
cat_features = list(set(self.cat_features).intersection(new_data.columns))
features = [feat for feat in self._features if feat in new_data.columns]
cat_features = [feat for feat in self.cat_features if feat in new_data.columns]
label_name = self._label_name if self._label_name in new_data.columns else None
index = self._index_name if self._index_name in new_data.columns else None
date = self._datetime_name if self._datetime_name in new_data.columns else None
Expand All @@ -388,6 +388,20 @@ def copy(self: TDataset, new_data) -> TDataset:
convert_datetime=False, max_categorical_ratio=self._max_categorical_ratio,
max_categories=self._max_categories, label_type=self.label_type)

def sample(self, n_samples: int, replace: bool = False, random_state: t.Optional[int] = None) -> TDataset:
"""Create a copy of the dataset object, with the internal dataframe being a sample of the original dataframe.
Args:
n_samples (int): Number of samples to draw.
replace (bool, default False): Whether to sample with replacement.
random_state (int, default None): Random state.
Returns:
Dataset: instance of the Dataset with sampled internal dataframe.
"""
n_samples = min(n_samples, len(self))
return self.copy(self._data.sample(n_samples, replace=replace, random_state=random_state))

@property
def n_samples(self) -> int:
"""Return number of samples in dataframe.
Expand Down
6 changes: 4 additions & 2 deletions deepchecks/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
RegressionSystematicError,
RegressionErrorDistribution,
ClassPerformance,
MultiModelPerformanceReport
MultiModelPerformanceReport,
ModelErrorAnalysis
)


Expand Down Expand Up @@ -114,5 +115,6 @@
'RegressionSystematicError',
'RegressionErrorDistribution',
'ClassPerformance',
'MultiModelPerformanceReport'
'MultiModelPerformanceReport',
'ModelErrorAnalysis'
]
4 changes: 3 additions & 1 deletion deepchecks/checks/performance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from .regression_systematic_error import RegressionSystematicError
from .regression_error_distribution import RegressionErrorDistribution
from .class_performance import ClassPerformance
from .model_error_analysis import ModelErrorAnalysis


__all__ = [
Expand All @@ -30,5 +31,6 @@
'RegressionSystematicError',
'RegressionErrorDistribution',
'ClassPerformance',
'MultiModelPerformanceReport'
'MultiModelPerformanceReport',
'ModelErrorAnalysis'
]
349 changes: 349 additions & 0 deletions deepchecks/checks/performance/model_error_analysis.py

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions deepchecks/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
"""Module with all deepchecks error types."""


__all__ = ['DeepchecksValueError', 'DeepchecksNotSupportedError', 'NumberOfFeaturesLimitError']
__all__ = ['DeepchecksValueError', 'DeepchecksNotSupportedError', 'DeepchecksProcessError',
'NumberOfFeaturesLimitError']


class DeepchecksBaseError(Exception):
Expand All @@ -29,7 +30,13 @@ class DeepchecksValueError(DeepchecksBaseError):


class DeepchecksNotSupportedError(DeepchecksBaseError):
"""Exception class that represent unsupported action in Deepchecks."""
"""Exception class that represents an unsupported action in Deepchecks."""

pass


class DeepchecksProcessError(DeepchecksBaseError):
"""Exception class that represents an issue with a process."""

pass

Expand Down
3 changes: 2 additions & 1 deletion deepchecks/suites/default_suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
ConfusionMatrixReport, RocReport, CalibrationScore, TrustScoreComparison, ClassPerformance,
RegressionErrorDistribution, RegressionSystematicError, PerformanceReport, SimpleModelComparison, BoostingOverfit,
TrainTestDifferenceOverfit, ModelInfo, ColumnsInfo, DataDuplicates, IsSingleValue, LabelAmbiguity,
DatasetsSizeComparison, UnusedFeatures, ModelInferenceTimeCheck, TrainTestLabelDrift
DatasetsSizeComparison, UnusedFeatures, ModelInferenceTimeCheck, ModelErrorAnalysis, TrainTestLabelDrift
)
from deepchecks import Suite

Expand Down Expand Up @@ -93,6 +93,7 @@ def model_evaluation() -> Suite:
TrainTestDifferenceOverfit().add_condition_degradation_ratio_not_greater_than(),
RocReport().add_condition_auc_not_less_than(),
SimpleModelComparison().add_condition_ratio_not_less_than(),
ModelErrorAnalysis().add_condition_segments_performance_relative_difference_not_greater_than(),
CalibrationScore(),
TrustScoreComparison().add_condition_mean_score_percent_decline_not_greater_than(),
RegressionSystematicError().add_condition_systematic_error_ratio_to_rmse_not_greater_than(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"path": "../../../../../examples/checks/performance/model_error_analysis.ipynb"
}

0 comments on commit 700598c

Please sign in to comment.