added exception or drift_mode!=prediction and balance_classes=True fo…

…r prediction drift (#2331) * initial drift commits * temp changes * added imbalanced cramer's v removed emd by partition (experimental) added ks (partially) * Cleaned, fixed pylint * Added comment where missing * Fixes * Changed defaults * Fixes * Update deepchecks/utils/distribution/drift.py Co-authored-by: Noam Bressler <noamzbr@gmail.com> * temp * Fixed error * Fixed pylint * Fixed test import * Fixed image property drift condition * Changed default to KS * fix * Higher coverage * Updated tests * Changed documentation * Changed documentation * Merged with main * Fixed conflict issue + pylint * Fixed CR comments + fixed test (for now) * Changed thresholds + tests * added exception or drift_mode!=prediction and balance_classes=True for TrainTestPredictionDrift in tabular * Added change when drift_mode = auto * Fixed CR + pylint * Fixed CR --------- Co-authored-by: Noam Bressler <noamzbr@gmail.com>
deepchecks · Feb 15, 2023 · 025c162 · 025c162
1 parent 8daa466
commit 025c162
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 12 deletions.
diff --git a/deepchecks/tabular/checks/model_evaluation/train_test_prediction_drift.py b/deepchecks/tabular/checks/model_evaluation/train_test_prediction_drift.py
@@ -53,18 +53,20 @@ class TrainTestPredictionDrift(TrainTestCheck, ReduceMixin):
     However, in cases of a variable with many categories with few samples, it is still recommended to use Cramer's V.
 
     **Note:** In case of highly imbalanced classes, it is recommended to use Cramer's V, together with setting
-    the ``balance_classes`` parameter to ``True``.
+    the ``balance_classes`` parameter to ``True``. This also requires setting the ``drift_mode`` parameter to
+    ``auto`` (default) or ``'prediction'``.
 
 
     Parameters
     ----------
     drift_mode: str, default: 'auto'
         For classification task, controls whether to compute drift on the predicted probabilities or the predicted
         classes. For regression task this parameter may be ignored.
-        If  set to 'auto', compute drift on the predicted class if the task is multiclass, and on
+        If set to 'auto', compute drift on the predicted class if the task is multiclass, and on
         the predicted probability of the positive class if binary. Set to 'proba' to force drift on the predicted
         probabilities, and 'prediction' to force drift on the predicted classes. If set to 'proba', on a multiclass
         task, drift would be calculated on each class independently.
+        If balance_classes=True, then 'auto' will calculate drift on the predicted class even if the label is binary
     margin_quantile_filter: float, default: 0.025
         float in range [0,0.5), representing which margins (high and low quantiles) of the distribution will be filtered
         out of the EMD calculation. This is done in order for extreme values not to affect the calculation
@@ -93,7 +95,8 @@ class TrainTestPredictionDrift(TrainTestCheck, ReduceMixin):
     balance_classes: bool, default: False
         If True, all categories will have an equal weight in the Cramer's V score. This is useful when the categorical
         variable is highly imbalanced, and we want to be alerted on changes in proportion to the category size,
-        and not only to the entire dataset. Must have categorical_drift_method = "cramers_v".
+        and not only to the entire dataset. Must have categorical_drift_method = "cramers_v" and
+        drift_mode = "auto" or "prediction".
         If True, the variable frequency plot will be created with a log scale in the y-axis.
     ignore_na: bool, default True
         For categorical columns only. If True, ignores nones for categorical drift. If False, considers none as a
@@ -147,6 +150,9 @@ def __init__(
         self.numerical_drift_method = numerical_drift_method
         self.categorical_drift_method = categorical_drift_method
         self.balance_classes = balance_classes
+        if self.balance_classes is True and self.drift_mode == 'proba':
+            raise DeepchecksValueError('balance_classes=True is not supported for drift_mode=\'proba\'. '
+                                       'Change drift_mode to \'prediction\' or \'auto\' in order to use this parameter')
         self.ignore_na = ignore_na
         self.max_classes_to_display = max_classes_to_display
         self.aggregation_method = aggregation_method
@@ -175,8 +181,10 @@ def run_logic(self, context: Context) -> CheckResult:
         method, classes = None, train_dataset.classes_in_label_col
 
         # Flag for computing drift on the probabilities rather than the predicted labels
-        proba_drift = ((context.task_type == TaskType.BINARY) and (self.drift_mode == 'auto')) or \
-                      (self.drift_mode == 'proba')
+        proba_drift = \
+            ((context.task_type == TaskType.BINARY and self.drift_mode == 'auto')
+             or (self.drift_mode == 'proba')) \
+            and not (self.balance_classes is True and self.drift_mode == 'auto')
 
         if proba_drift:
             train_prediction = np.array(model.predict_proba(train_dataset.features_columns))
@@ -276,6 +284,7 @@ def add_condition_drift_score_less_than(self, max_allowed_categorical_score: flo
         ConditionResult
             False if any column has passed the max threshold, True otherwise
         """
+
         def condition(result: t.Dict) -> ConditionResult:
             drift_score_dict = result['Drift score']
             # Move to dict for easier looping

diff --git a/deepchecks/utils/distribution/drift.py b/deepchecks/utils/distribution/drift.py
@@ -80,7 +80,7 @@ def rebalance_distributions(dist1_counts: np.array, dist2_counts: np.array):
         200% in the second category. The new dist2_counts should be [4450, 10000].
         # When re-adjusting to the original total num_samples of dist2, the new dist2_counts should be [3103, 6896]
     """
-    new_dist1_counts = [int(np.sum(dist1_counts)/len(dist1_counts))] * len(dist1_counts)
+    new_dist1_counts = [int(np.sum(dist1_counts) / len(dist1_counts))] * len(dist1_counts)
     multipliers = [nu / de if de != 0 else 0 for nu, de in zip(new_dist1_counts, dist1_counts)]
     new_dist2_counts = np.array([int(x) for x in dist2_counts * multipliers])
 
@@ -450,8 +450,9 @@ def calc_drift_and_plot(train_column: pd.Series,
 
     elif column_type == 'categorical':
         if balance_classes is True and categorical_drift_method.lower() not in ['cramer_v', 'cramers_v']:
-            raise ValueError('balance_classes is only supported for Cramer\'s V. please set balance_classes=False '
-                             'or use \'cramers_v\' as categorical_drift_method')
+            raise DeepchecksValueError(
+                'balance_classes is only supported for Cramer\'s V. please set balance_classes=False '
+                'or use \'cramers_v\' as categorical_drift_method')
 
         sort_by = 'difference' if show_categories_by == 'largest_difference' else \
             ('dist1' if show_categories_by == 'train_largest' else 'dist2')
@@ -465,8 +466,8 @@ def calc_drift_and_plot(train_column: pd.Series,
             score = psi(dist1=train_dist, dist2=test_dist, min_category_size_ratio=min_category_size_ratio,
                         max_num_categories=max_num_categories_for_drift, sort_by=sort_by)
         else:
-            raise ValueError('Expected categorical_drift_method to be one '
-                             f'of ["cramers_v", "PSI"], received: {categorical_drift_method}')
+            raise DeepchecksValueError('Expected categorical_drift_method to be one '
+                                       f'of ["cramers_v", "PSI"], received: {categorical_drift_method}')
 
         if not with_display:
             return score, scorer_name, None

diff --git a/tests/tabular/checks/model_evaluation/train_test_prediction_drift_test.py b/tests/tabular/checks/model_evaluation/train_test_prediction_drift_test.py
@@ -11,6 +11,7 @@
 """Test functions of the train test label drift."""
 from hamcrest import assert_that, calling, close_to, equal_to, greater_than, has_entries, has_length, raises
 
+import pandas as pd
 from deepchecks.core.errors import DeepchecksValueError
 from deepchecks.tabular.checks import TrainTestPredictionDrift
 from tests.base.utils import equal_condition_result
@@ -30,6 +31,7 @@ def test_no_drift_regression_label_emd(diabetes, diabetes_model):
          'Method': equal_to('Earth Mover\'s Distance')}
     ))
 
+
 def test_no_drift_regression_label_ks(diabetes, diabetes_model):
     # Arrange
     train, test = diabetes
@@ -45,7 +47,6 @@ def test_no_drift_regression_label_ks(diabetes, diabetes_model):
     ))
 
 
-
 def test_reduce_no_drift_regression_label(diabetes, diabetes_model):
     # Arrange
     train, test = diabetes
@@ -136,6 +137,45 @@ def test_drift_max_drift_score_condition_fail_psi(drifted_data_and_model):
     ))
 
 
+def test_balance_classes_without_cramers_v(drifted_data_and_model):
+    # Arrange
+    train, test, model = drifted_data_and_model
+    check = TrainTestPredictionDrift(categorical_drift_method='PSI', drift_mode='prediction', balance_classes=True)
+
+    assert_that(calling(check.run).with_args(train, test, model),
+                raises(DeepchecksValueError,
+                       'balance_classes is only supported for Cramer\'s V. please set balance_classes=False '
+                       'or use \'cramers_v\' as categorical_drift_method'))
+
+
+def test_balance_classes_without_correct_drift_mode():
+    # Arrange
+    assert_that(calling(TrainTestPredictionDrift).with_args(balance_classes=True, drift_mode='proba'),
+                raises(DeepchecksValueError,
+                       'balance_classes=True is not supported for drift_mode=\'proba\'. '
+                       'Change drift_mode to \'prediction\' or \'auto\' in order to use this parameter'))
+
+def test_balance_classes_with_drift_mode_auto(drifted_data):
+    # Arrange
+    train, test = drifted_data
+
+    n_train = train.n_samples
+    n_test = test.n_samples
+
+    predictions_train = pd.Series([0] * int(n_train * 0.95) + [1] * int(n_train * 0.05))
+    predictions_test = pd.Series([0] * int(n_test * 0.96) + [1] * int(n_test * 0.04))
+    check = TrainTestPredictionDrift(balance_classes=True)
+
+    # Act
+    result = check.run(train, test, y_pred_train=predictions_train, y_pred_test=predictions_test)
+
+    # Assert
+    assert_that(result.value, has_entries(
+        {'Drift score': close_to(0.05, 0.01),
+         'Method': equal_to('Cramer\'s V')} # If cramer's V then proves it changed to prediction mode
+    ))
+
+
 def test_drift_max_drift_score_condition_pass_threshold(drifted_data_and_model):
     # Arrange
     train, test, model = drifted_data_and_model
@@ -227,7 +267,7 @@ def test_multiclass_proba_reduce_aggregations(iris_split_dataset_and_model_rf):
     assert_that(result.reduce_output(), has_entries(
         {'Drift Score class 0': close_to(0.06, 0.01), 'Drift Score class 1': close_to(0.06, 0.01),
          'Drift Score class 2': close_to(0.03, 0.01)})
-    )
+                )
 
     # Test condition
     condition_result, *_ = check.conditions_decision(result)