Nb/feat/confusion matrix nlp (#2426)

deepchecks · Mar 28, 2023 · 73cb2d9 · 73cb2d9
1 parent 180b455
commit 73cb2d9
Show file tree

Hide file tree

Showing 17 changed files with 260 additions and 116 deletions.
diff --git a/deepchecks/nlp/checks/__init__.py b/deepchecks/nlp/checks/__init__.py
@@ -11,7 +11,7 @@
 """Module importing all nlp checks."""
 
 from deepchecks.nlp.checks.data_integrity import PropertyLabelCorrelation, TextPropertyOutliers
-from deepchecks.nlp.checks.model_evaluation import (MetadataSegmentsPerformance, PredictionDrift,
+from deepchecks.nlp.checks.model_evaluation import (ConfusionMatrixReport, MetadataSegmentsPerformance, PredictionDrift,
                                                     PropertySegmentsPerformance, SingleDatasetPerformance)
 from deepchecks.nlp.checks.train_test_validation import LabelDrift
 
@@ -24,6 +24,7 @@
     'SingleDatasetPerformance',
     'MetadataSegmentsPerformance',
     'PropertySegmentsPerformance',
+    'ConfusionMatrixReport',
 
     # Train Test Validation
     'PredictionDrift',

diff --git a/deepchecks/nlp/checks/data_integrity/property_label_correlation.py b/deepchecks/nlp/checks/data_integrity/property_label_correlation.py
@@ -16,6 +16,7 @@
 import deepchecks.ppscore as pps
 from deepchecks.core import CheckResult, ConditionCategory, ConditionResult
 from deepchecks.core.check_utils.feature_label_correlation_utils import get_pps_figure, pd_series_to_trace
+from deepchecks.core.errors import DatasetValidationError
 from deepchecks.nlp import Context, SingleDatasetCheck
 from deepchecks.nlp.task_type import TaskType
 from deepchecks.tabular.utils.messages import get_condition_passed_message
@@ -45,6 +46,10 @@ class PropertyLabelCorrelation(SingleDatasetCheck):
 
     Parameters
     ----------
+    properties_to_ignore: Optional[List[str]], default: None
+        List of properties to ignore in the check.
+    properties_to_include: Optional[List[str]], default: None
+        List of properties to include in the check. If None, all properties will be included.
     ppscore_params : dict , default: None
         dictionary of additional parameters for the ppscore.predictors function
     n_top_properties : int , default: 5
@@ -57,12 +62,18 @@ class PropertyLabelCorrelation(SingleDatasetCheck):
 
     def __init__(
             self,
+            properties_to_ignore: t.Optional[t.List[str]] = None,
+            properties_to_include: t.Optional[t.List[str]] = None,
             ppscore_params: t.Optional[t.Dict[t.Any, t.Any]] = None,
             n_top_properties: int = 5,
             n_samples: int = 100_000,
             **kwargs
     ):
         super().__init__(**kwargs)
+        if properties_to_ignore is not None and properties_to_include is not None:
+            raise DatasetValidationError('Cannot use both properties_to_ignore and properties_to_include arguments.')
+        self.properties_to_ignore = properties_to_ignore
+        self.properties_to_include = properties_to_include
         self.ppscore_params = ppscore_params or {}
         self.n_top_properties = n_top_properties
         self.n_samples = n_samples
@@ -89,7 +100,12 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
         if context.task_type in [TaskType.TEXT_CLASSIFICATION, TaskType.TOKEN_CLASSIFICATION]:
             label = label.astype('object')
 
-        df = text_data.properties.join(label)
+        properties_df = text_data.properties
+        if self.properties_to_ignore is not None:
+            properties_df = properties_df.drop(columns=self.properties_to_ignore)
+        elif self.properties_to_include is not None:
+            properties_df = properties_df[self.properties_to_include]
+        df = properties_df.join(label)
 
         df_pps = pps.predictors(df=df, y='label', random_seed=context.random_state,
                                 **self.ppscore_params)

diff --git a/deepchecks/nlp/checks/model_evaluation/__init__.py b/deepchecks/nlp/checks/model_evaluation/__init__.py
@@ -9,13 +9,11 @@
 # ----------------------------------------------------------------------------
 #
 """Module containing the model evaluation checks in the nlp package."""
-
+from deepchecks.nlp.checks.model_evaluation.confusion_matrix_report import ConfusionMatrixReport
 from deepchecks.nlp.checks.model_evaluation.prediction_drift import PredictionDrift
 from deepchecks.nlp.checks.model_evaluation.single_dataset_performance import SingleDatasetPerformance
 from deepchecks.nlp.checks.model_evaluation.weak_segments_performance import (MetadataSegmentsPerformance,
                                                                               PropertySegmentsPerformance)
 
-__all__ = [
-    'SingleDatasetPerformance', 'MetadataSegmentsPerformance', 'PropertySegmentsPerformance',
-    'PredictionDrift'
-]
+__all__ = ['SingleDatasetPerformance', 'MetadataSegmentsPerformance', 'PropertySegmentsPerformance',
+           'PredictionDrift', 'ConfusionMatrixReport']
diff --git a/deepchecks/nlp/checks/model_evaluation/confusion_matrix_report.py b/deepchecks/nlp/checks/model_evaluation/confusion_matrix_report.py
@@ -0,0 +1,61 @@
+# ----------------------------------------------------------------------------
+# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
+#
+# This file is part of Deepchecks.
+# Deepchecks is distributed under the terms of the GNU Affero General
+# Public License (version 3 or later).
+# You should have received a copy of the GNU Affero General Public License
+# along with Deepchecks.  If not, see <http://www.gnu.org/licenses/>.
+# ----------------------------------------------------------------------------
+#
+"""The confusion_matrix_report check module."""
+import numpy as np
+
+from deepchecks.core import CheckResult
+from deepchecks.nlp import Context, SingleDatasetCheck
+from deepchecks.utils.abstracts.confusion_matrix_abstract import run_confusion_matrix_check
+
+__all__ = ['ConfusionMatrixReport']
+
+
+class ConfusionMatrixReport(SingleDatasetCheck):
+    """Calculate the confusion matrix of the model on the given dataset.
+
+    Parameters
+    ----------
+    normalize_display : bool , default: True:
+        boolean that determines whether to normalize the values of the matrix in the display.
+    n_samples : int , default: 10_000
+        number of samples to use for this check.
+    random_state : int, default: 42
+        random seed for all check internals.
+    """
+
+    def __init__(self,
+                 normalize_display: bool = True,
+                 n_samples: int = 1_000_000,
+                 random_state: int = 42,
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.normalize_display = normalize_display
+        self.n_samples = n_samples
+        self.random_state = random_state
+
+    def run_logic(self, context: Context, dataset_kind) -> CheckResult:
+        """Run check.
+
+        Returns
+        -------
+        CheckResult
+            value is numpy array of the confusion matrix, displays the confusion matrix
+
+        Raises
+        ------
+        DeepchecksValueError
+            If the data is not a Dataset instance with a label
+        """
+        dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
+        y_true = np.asarray(dataset.label)
+        y_pred = np.array(context.model.predict(dataset)).reshape(len(y_true), )
+
+        return run_confusion_matrix_check(y_pred, y_true, context.with_display, self.normalize_display)
diff --git a/deepchecks/nlp/checks/model_evaluation/weak_segments_performance.py b/deepchecks/nlp/checks/model_evaluation/weak_segments_performance.py
@@ -22,8 +22,8 @@
 from deepchecks.nlp import Context, SingleDatasetCheck
 from deepchecks.tabular import Dataset
 from deepchecks.tabular.context import _DummyModel
+from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
 from deepchecks.utils.dataframes import select_from_dataframe
-from deepchecks.utils.performance.weak_segment_abstract import WeakSegmentAbstract
 from deepchecks.utils.typing import Hashable
 
 __all__ = ['MetadataSegmentsPerformance', 'PropertySegmentsPerformance']

diff --git a/deepchecks/nlp/text_data.py b/deepchecks/nlp/text_data.py
@@ -421,6 +421,10 @@ def set_properties(self, properties: pd.DataFrame, properties_types: t.Optional[
     @property
     def properties(self) -> pd.DataFrame:
         """Return the properties of the dataset."""
+        if self._properties is None:
+            raise DeepchecksNotSupportedError(
+                'TextData does not contain properties, add them by using calculate_default_properties or '
+                'set_properties functions')
         return self._properties
 
     @property

diff --git a/deepchecks/nlp/utils/nlp_plot.py b/deepchecks/nlp/utils/nlp_plot.py
@@ -86,7 +86,7 @@ def get_text_outliers_graph(dist: Sequence, data: Sequence[str], lower_limit: fl
                 y=cat_df[dist_name],
                 marker=dict(color=color_discrete_sequence),
                 name='Common',
-                text=[f'{x:.2%}' for x in cat_df[dist_name]],
+                # text=[f'{x:.2%}' for x in cat_data[dist_name]],
                 customdata=hover_data,
                 hovertemplate=hover_template
 

diff --git a/deepchecks/tabular/checks/model_evaluation/confusion_matrix_report.py b/deepchecks/tabular/checks/model_evaluation/confusion_matrix_report.py
@@ -10,12 +10,10 @@
 #
 """The confusion_matrix_report check module."""
 import numpy as np
-import pandas as pd
-from sklearn import metrics
 
 from deepchecks.core import CheckResult
 from deepchecks.tabular import Context, SingleDatasetCheck
-from deepchecks.utils.plot import create_confusion_matrix_figure
+from deepchecks.utils.abstracts.confusion_matrix_abstract import run_confusion_matrix_check
 
 __all__ = ['ConfusionMatrixReport']
 
@@ -25,21 +23,21 @@ class ConfusionMatrixReport(SingleDatasetCheck):
 
     Parameters
     ----------
-    normalized (bool, default True):
-        boolean that determines whether to normalize the true values of the matrix.
+    normalize_display : bool , default: True:
+        boolean that determines whether to normalize the values of the matrix in the display.
     n_samples : int , default: 1_000_000
         number of samples to use for this check.
     random_state : int, default: 42
         random seed for all check internals.
     """
 
     def __init__(self,
-                 normalized: bool = True,
+                 normalize_display: bool = True,
                  n_samples: int = 1_000_000,
                  random_state: int = 42,
                  **kwargs):
         super().__init__(**kwargs)
-        self.normalized = normalized
+        self.normalize_display = normalize_display
         self.n_samples = n_samples
         self.random_state = random_state
 
@@ -58,18 +56,7 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
         """
         dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
         context.assert_classification_task()
-        ds_y = dataset.label_col
-        ds_x = dataset.features_columns
-        model = context.model
+        y_true = dataset.label_col
+        y_pred = np.array(context.model.predict(dataset.features_columns)).reshape(len(y_true), )
 
-        y_pred = np.array(model.predict(ds_x)).reshape(len(ds_y), )
-        total_classes = sorted(list(set(pd.concat([ds_y, pd.Series(y_pred)]).to_list())))
-        confusion_matrix = metrics.confusion_matrix(ds_y, y_pred)
-
-        if context.with_display:
-            fig = create_confusion_matrix_figure(confusion_matrix, total_classes,
-                                                 total_classes, self.normalized)
-        else:
-            fig = None
-
-        return CheckResult(confusion_matrix, display=fig)
+        return run_confusion_matrix_check(y_pred, y_true, context.with_display, self.normalize_display)
diff --git a/deepchecks/tabular/checks/model_evaluation/weak_segments_performance.py b/deepchecks/tabular/checks/model_evaluation/weak_segments_performance.py
@@ -21,8 +21,8 @@
 from deepchecks.tabular import Context, SingleDatasetCheck
 from deepchecks.tabular.context import _DummyModel
 from deepchecks.tabular.utils.task_type import TaskType
+from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
 from deepchecks.utils.docref import doclink
-from deepchecks.utils.performance.weak_segment_abstract import WeakSegmentAbstract
 from deepchecks.utils.single_sample_metrics import calculate_per_sample_loss
 from deepchecks.utils.typing import Hashable
 

diff --git a/deepchecks/utils/abstracts/confusion_matrix_abstract.py b/deepchecks/utils/abstracts/confusion_matrix_abstract.py
@@ -0,0 +1,79 @@
+# ----------------------------------------------------------------------------
+# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
+#
+# This file is part of Deepchecks.
+# Deepchecks is distributed under the terms of the GNU Affero General
+# Public License (version 3 or later).
+# You should have received a copy of the GNU Affero General Public License
+# along with Deepchecks.  If not, see <http://www.gnu.org/licenses/>.
+# ----------------------------------------------------------------------------
+#
+"""The confusion_matrix_report check module."""
+from typing import List
+
+import numpy as np
+import plotly.graph_objects as go
+from sklearn.metrics import confusion_matrix
+
+from deepchecks.core import CheckResult
+from deepchecks.utils.strings import format_number_if_not_nan
+
+__all__ = ['create_confusion_matrix_figure', 'run_confusion_matrix_check']
+
+
+def run_confusion_matrix_check(y_pred: np.ndarray, y_true: np.ndarray, with_display=True,
+                               normalize_display=True) -> CheckResult:
+    """Calculate confusion matrix based on predictions and true label values."""
+    total_classes = sorted([str(x) for x in set(y_pred).union(set(y_true))])
+    result = confusion_matrix(y_true, y_pred)
+
+    if with_display:
+        fig = create_confusion_matrix_figure(result, total_classes, normalize_display)
+    else:
+        fig = None
+
+    return CheckResult(result, display=fig)
+
+
+def create_confusion_matrix_figure(confusion_matrix_data: np.ndarray, classes_names: List[str],
+                                   normalize_display: bool):
+    """Create a confusion matrix figure.
+
+    Parameters
+    ----------
+    confusion_matrix_data: np.ndarray
+        2D array containing the confusion matrix.
+    classes_names: List[str]
+        the names of the classes to display as the axis.
+    normalize_display: bool
+        if True will also show normalized values by the true values.
+
+    Returns
+    -------
+    plotly Figure object
+        confusion matrix figure
+
+    """
+    if normalize_display:
+        confusion_matrix_norm = confusion_matrix_data.astype('float') / \
+                                (confusion_matrix_data.sum(axis=1)[:, np.newaxis] + np.finfo(float).eps) * 100
+        z = np.vectorize(format_number_if_not_nan)(confusion_matrix_norm)
+        text_template = '%{z}%<br>(%{text})'
+        color_bar_title = '% out of<br>True Values'
+        plot_title = 'Percent Out of True Values (Count)'
+    else:
+        z = confusion_matrix_data
+        color_bar_title = None
+        text_template = '%{text}'
+        plot_title = 'Value Count'
+
+    fig = go.Figure(data=go.Heatmap(
+        x=classes_names, y=classes_names, z=z,
+        text=confusion_matrix_data, texttemplate=text_template))
+    fig.data[0].colorbar.title = color_bar_title
+    fig.update_layout(title=plot_title)
+    fig.update_layout(height=600)
+    fig.update_xaxes(title='Predicted Value', type='category', scaleanchor='y', constrain='domain')
+    fig.update_yaxes(title='True value', type='category', constrain='domain', autorange='reversed')
+
+    return fig
diff --git a/...tils/performance/weak_segment_abstract.py → .../utils/abstracts/weak_segment_abstract.py b/...tils/performance/weak_segment_abstract.py → .../utils/abstracts/weak_segment_abstract.py