Improve sfc 1399 (#1478)

* Added parameter in condition of single_feature_contribution_train_test.py * Added parameter to diff condition in simple feature contribution check and tests that were missing * Changed plots for single feature contribution train test and simple feature contribution. Removed purple line + added percentage of change in parenthesis (and made bold) Also added parameter to format_percent to sometimes show '+' as prefix * Fixed embarrassing mistake * isort * Fixed missing minus signs
deepchecks · May 22, 2022 · 3942d0e · 3942d0e
1 parent 0a7d474
commit 3942d0e
Show file tree

Hide file tree

Showing 7 changed files with 263 additions and 47 deletions.
diff --git a/deepchecks/core/check_utils/single_feature_contribution_utils.py b/deepchecks/core/check_utils/single_feature_contribution_utils.py
@@ -17,6 +17,7 @@
 
 import deepchecks.ppscore as pps
 from deepchecks.utils.plot import colors
+from deepchecks.utils.strings import format_percent
 from deepchecks.utils.typing import Hashable
 
 
@@ -40,14 +41,28 @@ def get_pps_figure(per_class: bool):
     return fig
 
 
-def pps_df_to_trace(s_pps: pd.Series, name: str):
-    """If name is train/test use our defined colors, else will use plotly defaults."""
+def pd_series_to_trace(s_pps: pd.Series, name: str):
+    """Create bar plotly bar trace out of pandas Series."""
     name = name.capitalize() if name else None
     return go.Bar(x=s_pps.index,
                   y=s_pps,
                   name=name,
                   marker_color=colors.get(name),
-                  text=s_pps.round(2),
+                  text='<b>' + s_pps.round(2).astype(str) + '</b>',
+                  textposition='outside'
+                  )
+
+
+def pd_series_to_trace_with_diff(s_pps: pd.Series, name: str, diffs: pd.Series):
+    """Create bar plotly bar trace out of pandas Series, with difference shown in percentages."""
+    diffs_text = '(' + diffs.apply(format_percent, floating_point=0, add_positive_prefix=True) + ')'
+    text = diffs_text + '<br>' + s_pps.round(2).astype(str)
+    name = name.capitalize() if name else None
+    return go.Bar(x=s_pps.index,
+                  y=s_pps,
+                  name=name,
+                  marker_color=colors.get(name),
+                  text='<b>' + text + '</b>',
                   textposition='outside'
                   )
 
@@ -100,21 +115,14 @@ def get_single_feature_contribution(train_df: pd.DataFrame, train_label_name: Op
     s_pps_test = df_pps_test.set_index('x', drop=True)['ppscore']
     s_difference = s_pps_train - s_pps_test
 
-    s_difference_to_display = np.abs(s_difference).sort_values(ascending=False).head(n_show_top)
-
-    s_pps_train_to_display = s_pps_train[s_difference_to_display.index]
-    s_pps_test_to_display = s_pps_test[s_difference_to_display.index]
+    sorted_order_for_display = np.abs(s_difference).sort_values(ascending=False).head(n_show_top).index
+    s_pps_train_to_display = s_pps_train[sorted_order_for_display]
+    s_pps_test_to_display = s_pps_test[sorted_order_for_display]
+    s_difference_to_display = s_difference[sorted_order_for_display]
 
     fig = get_pps_figure(per_class=False)
-    fig.add_trace(pps_df_to_trace(s_pps_train_to_display, 'train'))
-    fig.add_trace(pps_df_to_trace(s_pps_test_to_display, 'test'))
-    fig.add_trace(go.Scatter(x=s_difference_to_display.index,
-                             y=s_difference_to_display,
-                             name='Train-Test Difference (abs)',
-                             marker=dict(symbol='circle', size=15),
-                             line=dict(color='#aa57b5', width=5),
-                             text=s_difference_to_display.round(2)
-                             ))
+    fig.add_trace(pd_series_to_trace(s_pps_train_to_display, 'train'))
+    fig.add_trace(pd_series_to_trace_with_diff(s_pps_test_to_display, 'test', -s_difference_to_display))
 
     ret_value = {'train': s_pps_train.to_dict(), 'test': s_pps_test.to_dict(),
                  'train-test difference': s_difference.to_dict()}
@@ -203,16 +211,16 @@ def get_single_feature_contribution_per_class(train_df: pd.DataFrame, train_labe
 
         # display only if not all scores are above min_pps_to_show
         if any(s_train > min_pps_to_show) or any(s_test > min_pps_to_show):
-            s_difference_to_display = np.abs(s_difference).apply(lambda x: 0 if x < 0 else x)
-            s_difference_to_display = s_difference_to_display.sort_values(ascending=False).head(n_show_top)
+            sorted_order_for_display = np.abs(s_difference).sort_values(ascending=False).head(n_show_top).index
 
-            s_train_to_display = s_train[s_difference_to_display.index]
-            s_test_to_display = s_test[s_difference_to_display.index]
+            s_train_to_display = s_train[sorted_order_for_display]
+            s_test_to_display = s_test[sorted_order_for_display]
+            s_difference_to_display = s_difference[sorted_order_for_display]
 
             fig = get_pps_figure(per_class=True)
             fig.update_layout(title=f'{feature}: Predictive Power Score (PPS) Per Class')
-            fig.add_trace(pps_df_to_trace(s_train_to_display, 'train'))
-            fig.add_trace(pps_df_to_trace(s_test_to_display, 'test'))
+            fig.add_trace(pd_series_to_trace(s_train_to_display, 'train'))
+            fig.add_trace(pd_series_to_trace_with_diff(s_test_to_display, 'test', -s_difference_to_display))
             display.append(fig)
 
     return ret_value, display
diff --git a/deepchecks/tabular/checks/data_integrity/single_feature_contribution.py b/deepchecks/tabular/checks/data_integrity/single_feature_contribution.py
@@ -14,7 +14,7 @@
 import deepchecks.ppscore as pps
 from deepchecks.core import CheckResult, ConditionCategory, ConditionResult
 from deepchecks.core.check_utils.single_feature_contribution_utils import (
-    get_pps_figure, pps_df_to_trace)
+    get_pps_figure, pd_series_to_trace)
 from deepchecks.tabular import Context, SingleDatasetCheck
 from deepchecks.utils.strings import format_number
 from deepchecks.utils.typing import Hashable
@@ -91,7 +91,7 @@ def run_logic(self, context: Context, dataset_type: str = 'train') -> CheckResul
         top_to_show = s_ppscore.head(self.n_top_features)
 
         fig = get_pps_figure(per_class=False)
-        fig.add_trace(pps_df_to_trace(top_to_show, dataset_type))
+        fig.add_trace(pd_series_to_trace(top_to_show, dataset_type))
 
         text = [
             'The Predictive Power Score (PPS) is used to estimate the ability of a feature to predict the '

diff --git a/deepchecks/tabular/checks/train_test_validation/single_feature_contribution_train_test.py b/deepchecks/tabular/checks/train_test_validation/single_feature_contribution_train_test.py
@@ -10,6 +10,9 @@
 #
 """The single_feature_contribution check module."""
 import typing as t
+from copy import copy
+
+import numpy as np
 
 from deepchecks.core import CheckResult, ConditionResult
 from deepchecks.core.check_utils.single_feature_contribution_utils import \
@@ -119,22 +122,34 @@ def run_logic(self, context: Context) -> CheckResult:
 
         return CheckResult(value=ret_value, display=display, header='Single Feature Contribution Train-Test')
 
-    def add_condition_feature_pps_difference_not_greater_than(self: FC, threshold: float = 0.2) -> FC:
+    def add_condition_feature_pps_difference_not_greater_than(self: FC, threshold: float = 0.2,
+                                                              include_negative_diff: bool = True) -> FC:
         """Add new condition.
 
         Add condition that will check that difference between train
         dataset feature pps and test dataset feature pps is not greater than X.
 
         Parameters
         ----------
-        threshold : float , default: 0.2
-            train test ps difference upper bound.
+        threshold: float, default: 0.2
+            train test pps difference upper bound.
+        include_negative_diff: bool, default True
+            This parameter decides whether the condition checks the absolute value of the difference, or just the
+            positive value.
+            The difference is calculated as train PPS minus test PPS. This is because we're interested in the case
+            where the test dataset is less predictive of the label than the train dataset, as this could indicate
+            leakage of labels into the train dataset.
         """
 
         def condition(value: t.Dict[Hashable, t.Dict[Hashable, float]]) -> ConditionResult:
+
+            diff_dict = copy(value['train-test difference'])
+            if include_negative_diff is True:
+                diff_dict = {k: np.abs(v) for k, v in diff_dict.items()}
+
             failed_features = {
                 feature_name: format_number(pps_diff)
-                for feature_name, pps_diff in value['train-test difference'].items()
+                for feature_name, pps_diff in diff_dict.items()
                 if pps_diff > threshold
             }
 

diff --git a/deepchecks/utils/strings.py b/deepchecks/utils/strings.py
@@ -412,15 +412,21 @@ def truncate_zero_percent(ratio: float, floating_point: int):
     return f'{ratio * 100:.{floating_point}f}'.rstrip('0').rstrip('.') + '%'
 
 
-def format_percent(ratio: float, floating_point: int = 2, scientific_notation_threshold: int = 4) -> str:
+def format_percent(ratio: float, floating_point: int = 2, scientific_notation_threshold: int = 4,
+                   add_positive_prefix: bool = False) -> str:
     """Format percent for elegant display.
 
     Parameters
     ----------
     ratio : float
         Ratio to be displayed as percent
-    floating_point : int , default: 2
+    floating_point: int , default: 2
         Number of floating points to display
+    scientific_notation_threshold: int, default: 4
+        Max number of floating points for which to show number as float. If number of floating points is larger than
+        this parameter, scientific notation (e.g. "10E-5%") will be shown.
+    add_positive_prefix: bool, default: False
+        add plus sign before positive percentages (minus sign is always added for negative percentages).
     Returns
     -------
     str
@@ -431,7 +437,7 @@ def format_percent(ratio: float, floating_point: int = 2, scientific_notation_th
         ratio = -ratio
         prefix = '-'
     else:
-        prefix = ''
+        prefix = '+' if add_positive_prefix and ratio != 0 else ''
 
     if int(ratio) == ratio:
         result = f'{int(ratio) * 100}%'

diff --git a/deepchecks/vision/checks/train_test_validation/simple_feature_contribution.py b/deepchecks/vision/checks/train_test_validation/simple_feature_contribution.py
@@ -10,6 +10,7 @@
 #
 """Module contains the simple feature distribution check."""
 from collections import defaultdict
+from copy import copy
 from typing import Callable, Dict, Hashable, TypeVar, Union
 
 import numpy as np
@@ -232,7 +233,8 @@ def is_float_column(col: pd.Series) -> bool:
 
         return (col.round() != col).any()
 
-    def add_condition_feature_pps_difference_not_greater_than(self: SFC, threshold: float = 0.2) -> SFC:
+    def add_condition_feature_pps_difference_not_greater_than(self: SFC, threshold: float = 0.2,
+                                                              include_negative_diff: bool = False) -> SFC:
         """Add new condition.
 
         Add condition that will check that difference between train
@@ -244,27 +246,38 @@ def add_condition_feature_pps_difference_not_greater_than(self: SFC, threshold:
         ----------
         threshold : float , default: 0.2
             train test ps difference upper bound.
+        include_negative_diff: bool, default True
+            This parameter decides whether the condition checks the absolute value of the difference, or just the
+            positive value.
+            The difference is calculated as train PPS minus test PPS. This is because we're interested in the case
+            where the test dataset is less predictive of the label than the train dataset, as this could indicate
+            leakage of labels into the train dataset.
+
 
         Returns
         -------
         SFC
         """
 
-        def condition(value: Dict[Hashable, Dict[Hashable, float]]) -> ConditionResult:
+        def condition(value: Union[Dict[Hashable, Dict[Hashable, float]],
+                                   Dict[Hashable, Dict[Hashable, Dict[Hashable, float]]]],
+                      ) -> ConditionResult:
+
             if self.per_class is True:
-                failed_features = {
-                    feature_name: format_number(pps_value)
-                    for feature_name, pps_value in
-                    zip(value.keys(), [max(value[f]['train-test difference'].values()) for f in value.keys()])
-                    if np.abs(pps_value) > threshold
-                }
+                diff_dict = {f: max(value[f]['train-test difference'].values()) for f in value.keys()}
+                if include_negative_diff is True:
+                    diff_dict = {f: max(np.abs(value[f]['train-test difference'].values())) for f in value.keys()}
 
             else:
-                failed_features = {
-                    feature_name: format_number(pps_value)
-                    for feature_name, pps_value in value['train-test difference'].items()
-                    if np.abs(pps_value) > threshold
-                }
+                diff_dict = copy(value['train-test difference'])
+                if include_negative_diff is True:
+                    diff_dict = {k: np.abs(v) for k, v in diff_dict.items()}
+
+            failed_features = {
+                feature_name: format_number(pps_value)
+                for feature_name, pps_value in diff_dict.items()
+                if pps_value > threshold
+            }
 
             if failed_features:
                 message = f'Features with PPS difference above threshold: {failed_features}'

diff --git a/tests/tabular/checks/train_test_validation/single_feature_contribution_test.py b/tests/tabular/checks/train_test_validation/single_feature_contribution_test.py
@@ -177,10 +177,49 @@ def test_all_features_pps_upper_bound_condition_that_should_pass():
     ))
 
 
-def test_train_test_condition_pps_difference_pass():
+def test_train_test_condition_pps_positive_difference_pass():
     # Arrange
     df, df2, expected = util_generate_second_similar_dataframe_and_expected()
     condition_value = 0.4
+    check = SingleFeatureContributionTrainTest(random_state=42).\
+        add_condition_feature_pps_difference_not_greater_than(threshold=condition_value, include_negative_diff=False)
+
+    # Act
+    result = SingleFeatureContributionTrainTest(random_state=42).run(
+        train_dataset=Dataset(df, label='label'), test_dataset=Dataset(df2, label='label'))
+    condition_result, *_ = check.conditions_decision(result)
+
+    # Assert
+    assert_that(condition_result, equal_condition_result(
+        is_pass=True,
+        name=f'Train-Test features\' Predictive Power Score difference is not greater than {condition_value}'
+    ))
+
+
+def test_train_test_condition_pps_positive_difference_fail():
+    # Arrange
+    df, df2, expected = util_generate_second_similar_dataframe_and_expected()
+    condition_value = 0.01
+    check = SingleFeatureContributionTrainTest(random_state=42).\
+        add_condition_feature_pps_difference_not_greater_than(condition_value, include_negative_diff=False)
+
+    # Act
+    result = SingleFeatureContributionTrainTest(random_state=42).run(train_dataset=Dataset(df, label='label'),
+                                                                     test_dataset=Dataset(df2, label='label'))
+    condition_result, *_ = check.conditions_decision(result)
+
+    # Assert
+    assert_that(condition_result, equal_condition_result(
+        is_pass=False,
+        name=f'Train-Test features\' Predictive Power Score difference is not greater than {condition_value}',
+        details='Features with PPS difference above threshold: {\'x2\': \'0.31\'}'
+    ))
+
+
+def test_train_test_condition_pps_difference_pass():
+    # Arrange
+    df, df2, expected = util_generate_second_similar_dataframe_and_expected()
+    condition_value = 0.6
     check = SingleFeatureContributionTrainTest(random_state=42
                                                ).add_condition_feature_pps_difference_not_greater_than(condition_value)
 
@@ -199,7 +238,7 @@ def test_train_test_condition_pps_difference_pass():
 def test_train_test_condition_pps_difference_fail():
     # Arrange
     df, df2, expected = util_generate_second_similar_dataframe_and_expected()
-    condition_value = 0.01
+    condition_value = 0.4
     check = SingleFeatureContributionTrainTest(random_state=42
                                                ).add_condition_feature_pps_difference_not_greater_than(condition_value)
 
@@ -212,7 +251,7 @@ def test_train_test_condition_pps_difference_fail():
     assert_that(condition_result, equal_condition_result(
         is_pass=False,
         name=f'Train-Test features\' Predictive Power Score difference is not greater than {condition_value}',
-        details='Features with PPS difference above threshold: {\'x2\': \'0.31\'}'
+        details='Features with PPS difference above threshold: {\'x3\': \'0.54\'}'
     ))