Skip to content

Commit

Permalink
Nb/feat/confusion matrix nlp (#2426)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nadav-Barak committed Mar 28, 2023
1 parent 180b455 commit 73cb2d9
Show file tree
Hide file tree
Showing 17 changed files with 260 additions and 116 deletions.
3 changes: 2 additions & 1 deletion deepchecks/nlp/checks/__init__.py
Expand Up @@ -11,7 +11,7 @@
"""Module importing all nlp checks."""

from deepchecks.nlp.checks.data_integrity import PropertyLabelCorrelation, TextPropertyOutliers
from deepchecks.nlp.checks.model_evaluation import (MetadataSegmentsPerformance, PredictionDrift,
from deepchecks.nlp.checks.model_evaluation import (ConfusionMatrixReport, MetadataSegmentsPerformance, PredictionDrift,
PropertySegmentsPerformance, SingleDatasetPerformance)
from deepchecks.nlp.checks.train_test_validation import LabelDrift

Expand All @@ -24,6 +24,7 @@
'SingleDatasetPerformance',
'MetadataSegmentsPerformance',
'PropertySegmentsPerformance',
'ConfusionMatrixReport',

# Train Test Validation
'PredictionDrift',
Expand Down
Expand Up @@ -16,6 +16,7 @@
import deepchecks.ppscore as pps
from deepchecks.core import CheckResult, ConditionCategory, ConditionResult
from deepchecks.core.check_utils.feature_label_correlation_utils import get_pps_figure, pd_series_to_trace
from deepchecks.core.errors import DatasetValidationError
from deepchecks.nlp import Context, SingleDatasetCheck
from deepchecks.nlp.task_type import TaskType
from deepchecks.tabular.utils.messages import get_condition_passed_message
Expand Down Expand Up @@ -45,6 +46,10 @@ class PropertyLabelCorrelation(SingleDatasetCheck):
Parameters
----------
properties_to_ignore: Optional[List[str]], default: None
List of properties to ignore in the check.
properties_to_include: Optional[List[str]], default: None
List of properties to include in the check. If None, all properties will be included.
ppscore_params : dict , default: None
dictionary of additional parameters for the ppscore.predictors function
n_top_properties : int , default: 5
Expand All @@ -57,12 +62,18 @@ class PropertyLabelCorrelation(SingleDatasetCheck):

def __init__(
self,
properties_to_ignore: t.Optional[t.List[str]] = None,
properties_to_include: t.Optional[t.List[str]] = None,
ppscore_params: t.Optional[t.Dict[t.Any, t.Any]] = None,
n_top_properties: int = 5,
n_samples: int = 100_000,
**kwargs
):
super().__init__(**kwargs)
if properties_to_ignore is not None and properties_to_include is not None:
raise DatasetValidationError('Cannot use both properties_to_ignore and properties_to_include arguments.')
self.properties_to_ignore = properties_to_ignore
self.properties_to_include = properties_to_include
self.ppscore_params = ppscore_params or {}
self.n_top_properties = n_top_properties
self.n_samples = n_samples
Expand All @@ -89,7 +100,12 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
if context.task_type in [TaskType.TEXT_CLASSIFICATION, TaskType.TOKEN_CLASSIFICATION]:
label = label.astype('object')

df = text_data.properties.join(label)
properties_df = text_data.properties
if self.properties_to_ignore is not None:
properties_df = properties_df.drop(columns=self.properties_to_ignore)
elif self.properties_to_include is not None:
properties_df = properties_df[self.properties_to_include]
df = properties_df.join(label)

df_pps = pps.predictors(df=df, y='label', random_seed=context.random_state,
**self.ppscore_params)
Expand Down
8 changes: 3 additions & 5 deletions deepchecks/nlp/checks/model_evaluation/__init__.py
Expand Up @@ -9,13 +9,11 @@
# ----------------------------------------------------------------------------
#
"""Module containing the model evaluation checks in the nlp package."""

from deepchecks.nlp.checks.model_evaluation.confusion_matrix_report import ConfusionMatrixReport
from deepchecks.nlp.checks.model_evaluation.prediction_drift import PredictionDrift
from deepchecks.nlp.checks.model_evaluation.single_dataset_performance import SingleDatasetPerformance
from deepchecks.nlp.checks.model_evaluation.weak_segments_performance import (MetadataSegmentsPerformance,
PropertySegmentsPerformance)

__all__ = [
'SingleDatasetPerformance', 'MetadataSegmentsPerformance', 'PropertySegmentsPerformance',
'PredictionDrift'
]
__all__ = ['SingleDatasetPerformance', 'MetadataSegmentsPerformance', 'PropertySegmentsPerformance',
'PredictionDrift', 'ConfusionMatrixReport']
61 changes: 61 additions & 0 deletions deepchecks/nlp/checks/model_evaluation/confusion_matrix_report.py
@@ -0,0 +1,61 @@
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""The confusion_matrix_report check module."""
import numpy as np

from deepchecks.core import CheckResult
from deepchecks.nlp import Context, SingleDatasetCheck
from deepchecks.utils.abstracts.confusion_matrix_abstract import run_confusion_matrix_check

__all__ = ['ConfusionMatrixReport']


class ConfusionMatrixReport(SingleDatasetCheck):
"""Calculate the confusion matrix of the model on the given dataset.
Parameters
----------
normalize_display : bool , default: True:
boolean that determines whether to normalize the values of the matrix in the display.
n_samples : int , default: 10_000
number of samples to use for this check.
random_state : int, default: 42
random seed for all check internals.
"""

def __init__(self,
normalize_display: bool = True,
n_samples: int = 1_000_000,
random_state: int = 42,
**kwargs):
super().__init__(**kwargs)
self.normalize_display = normalize_display
self.n_samples = n_samples
self.random_state = random_state

def run_logic(self, context: Context, dataset_kind) -> CheckResult:
"""Run check.
Returns
-------
CheckResult
value is numpy array of the confusion matrix, displays the confusion matrix
Raises
------
DeepchecksValueError
If the data is not a Dataset instance with a label
"""
dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
y_true = np.asarray(dataset.label)
y_pred = np.array(context.model.predict(dataset)).reshape(len(y_true), )

return run_confusion_matrix_check(y_pred, y_true, context.with_display, self.normalize_display)
Expand Up @@ -22,8 +22,8 @@
from deepchecks.nlp import Context, SingleDatasetCheck
from deepchecks.tabular import Dataset
from deepchecks.tabular.context import _DummyModel
from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.dataframes import select_from_dataframe
from deepchecks.utils.performance.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.typing import Hashable

__all__ = ['MetadataSegmentsPerformance', 'PropertySegmentsPerformance']
Expand Down
4 changes: 4 additions & 0 deletions deepchecks/nlp/text_data.py
Expand Up @@ -421,6 +421,10 @@ def set_properties(self, properties: pd.DataFrame, properties_types: t.Optional[
@property
def properties(self) -> pd.DataFrame:
"""Return the properties of the dataset."""
if self._properties is None:
raise DeepchecksNotSupportedError(
'TextData does not contain properties, add them by using calculate_default_properties or '
'set_properties functions')
return self._properties

@property
Expand Down
2 changes: 1 addition & 1 deletion deepchecks/nlp/utils/nlp_plot.py
Expand Up @@ -86,7 +86,7 @@ def get_text_outliers_graph(dist: Sequence, data: Sequence[str], lower_limit: fl
y=cat_df[dist_name],
marker=dict(color=color_discrete_sequence),
name='Common',
text=[f'{x:.2%}' for x in cat_df[dist_name]],
# text=[f'{x:.2%}' for x in cat_data[dist_name]],
customdata=hover_data,
hovertemplate=hover_template

Expand Down
Expand Up @@ -10,12 +10,10 @@
#
"""The confusion_matrix_report check module."""
import numpy as np
import pandas as pd
from sklearn import metrics

from deepchecks.core import CheckResult
from deepchecks.tabular import Context, SingleDatasetCheck
from deepchecks.utils.plot import create_confusion_matrix_figure
from deepchecks.utils.abstracts.confusion_matrix_abstract import run_confusion_matrix_check

__all__ = ['ConfusionMatrixReport']

Expand All @@ -25,21 +23,21 @@ class ConfusionMatrixReport(SingleDatasetCheck):
Parameters
----------
normalized (bool, default True):
boolean that determines whether to normalize the true values of the matrix.
normalize_display : bool , default: True:
boolean that determines whether to normalize the values of the matrix in the display.
n_samples : int , default: 1_000_000
number of samples to use for this check.
random_state : int, default: 42
random seed for all check internals.
"""

def __init__(self,
normalized: bool = True,
normalize_display: bool = True,
n_samples: int = 1_000_000,
random_state: int = 42,
**kwargs):
super().__init__(**kwargs)
self.normalized = normalized
self.normalize_display = normalize_display
self.n_samples = n_samples
self.random_state = random_state

Expand All @@ -58,18 +56,7 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
"""
dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
context.assert_classification_task()
ds_y = dataset.label_col
ds_x = dataset.features_columns
model = context.model
y_true = dataset.label_col
y_pred = np.array(context.model.predict(dataset.features_columns)).reshape(len(y_true), )

y_pred = np.array(model.predict(ds_x)).reshape(len(ds_y), )
total_classes = sorted(list(set(pd.concat([ds_y, pd.Series(y_pred)]).to_list())))
confusion_matrix = metrics.confusion_matrix(ds_y, y_pred)

if context.with_display:
fig = create_confusion_matrix_figure(confusion_matrix, total_classes,
total_classes, self.normalized)
else:
fig = None

return CheckResult(confusion_matrix, display=fig)
return run_confusion_matrix_check(y_pred, y_true, context.with_display, self.normalize_display)
Expand Up @@ -21,8 +21,8 @@
from deepchecks.tabular import Context, SingleDatasetCheck
from deepchecks.tabular.context import _DummyModel
from deepchecks.tabular.utils.task_type import TaskType
from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.docref import doclink
from deepchecks.utils.performance.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.single_sample_metrics import calculate_per_sample_loss
from deepchecks.utils.typing import Hashable

Expand Down
79 changes: 79 additions & 0 deletions deepchecks/utils/abstracts/confusion_matrix_abstract.py
@@ -0,0 +1,79 @@
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""The confusion_matrix_report check module."""
from typing import List

import numpy as np
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix

from deepchecks.core import CheckResult
from deepchecks.utils.strings import format_number_if_not_nan

__all__ = ['create_confusion_matrix_figure', 'run_confusion_matrix_check']


def run_confusion_matrix_check(y_pred: np.ndarray, y_true: np.ndarray, with_display=True,
normalize_display=True) -> CheckResult:
"""Calculate confusion matrix based on predictions and true label values."""
total_classes = sorted([str(x) for x in set(y_pred).union(set(y_true))])
result = confusion_matrix(y_true, y_pred)

if with_display:
fig = create_confusion_matrix_figure(result, total_classes, normalize_display)
else:
fig = None

return CheckResult(result, display=fig)


def create_confusion_matrix_figure(confusion_matrix_data: np.ndarray, classes_names: List[str],
normalize_display: bool):
"""Create a confusion matrix figure.
Parameters
----------
confusion_matrix_data: np.ndarray
2D array containing the confusion matrix.
classes_names: List[str]
the names of the classes to display as the axis.
normalize_display: bool
if True will also show normalized values by the true values.
Returns
-------
plotly Figure object
confusion matrix figure
"""
if normalize_display:
confusion_matrix_norm = confusion_matrix_data.astype('float') / \
(confusion_matrix_data.sum(axis=1)[:, np.newaxis] + np.finfo(float).eps) * 100
z = np.vectorize(format_number_if_not_nan)(confusion_matrix_norm)
text_template = '%{z}%<br>(%{text})'
color_bar_title = '% out of<br>True Values'
plot_title = 'Percent Out of True Values (Count)'
else:
z = confusion_matrix_data
color_bar_title = None
text_template = '%{text}'
plot_title = 'Value Count'

fig = go.Figure(data=go.Heatmap(
x=classes_names, y=classes_names, z=z,
text=confusion_matrix_data, texttemplate=text_template))
fig.data[0].colorbar.title = color_bar_title
fig.update_layout(title=plot_title)
fig.update_layout(height=600)
fig.update_xaxes(title='Predicted Value', type='category', scaleanchor='y', constrain='domain')
fig.update_yaxes(title='True value', type='category', constrain='domain', autorange='reversed')

return fig

0 comments on commit 73cb2d9

Please sign in to comment.