Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nb/feat/confusion matrix nlp #2426

Merged
merged 4 commits into from Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion deepchecks/nlp/checks/__init__.py
Expand Up @@ -11,7 +11,7 @@
"""Module importing all nlp checks."""

from deepchecks.nlp.checks.data_integrity import PropertyLabelCorrelation, TextPropertyOutliers
from deepchecks.nlp.checks.model_evaluation import (MetadataSegmentsPerformance, PredictionDrift,
from deepchecks.nlp.checks.model_evaluation import (ConfusionMatrixReport, MetadataSegmentsPerformance, PredictionDrift,
PropertySegmentsPerformance, SingleDatasetPerformance)
from deepchecks.nlp.checks.train_test_validation import LabelDrift

Expand All @@ -24,6 +24,7 @@
'SingleDatasetPerformance',
'MetadataSegmentsPerformance',
'PropertySegmentsPerformance',
'ConfusionMatrixReport',

# Train Test Validation
'PredictionDrift',
Expand Down
Expand Up @@ -16,6 +16,7 @@
import deepchecks.ppscore as pps
from deepchecks.core import CheckResult, ConditionCategory, ConditionResult
from deepchecks.core.check_utils.feature_label_correlation_utils import get_pps_figure, pd_series_to_trace
from deepchecks.core.errors import DatasetValidationError
from deepchecks.nlp import Context, SingleDatasetCheck
from deepchecks.nlp.task_type import TaskType
from deepchecks.tabular.utils.messages import get_condition_passed_message
Expand Down Expand Up @@ -45,6 +46,10 @@ class PropertyLabelCorrelation(SingleDatasetCheck):

Parameters
----------
properties_to_ignore: Optional[List[str]], default: None
List of properties to ignore in the check.
properties_to_include: Optional[List[str]], default: None
List of properties to include in the check. If None, all properties will be included.
ppscore_params : dict , default: None
dictionary of additional parameters for the ppscore.predictors function
n_top_properties : int , default: 5
Expand All @@ -57,12 +62,18 @@ class PropertyLabelCorrelation(SingleDatasetCheck):

def __init__(
self,
properties_to_ignore: t.Optional[t.List[str]] = None,
properties_to_include: t.Optional[t.List[str]] = None,
ppscore_params: t.Optional[t.Dict[t.Any, t.Any]] = None,
n_top_properties: int = 5,
n_samples: int = 100_000,
**kwargs
):
super().__init__(**kwargs)
if properties_to_ignore is not None and properties_to_include is not None:
raise DatasetValidationError('Cannot use both properties_to_ignore and properties_to_include arguments.')
self.properties_to_ignore = properties_to_ignore
self.properties_to_include = properties_to_include
self.ppscore_params = ppscore_params or {}
self.n_top_properties = n_top_properties
self.n_samples = n_samples
Expand All @@ -89,7 +100,12 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
if context.task_type in [TaskType.TEXT_CLASSIFICATION, TaskType.TOKEN_CLASSIFICATION]:
label = label.astype('object')

df = text_data.properties.join(label)
properties_df = text_data.properties
if self.properties_to_ignore is not None:
properties_df = properties_df.drop(columns=self.properties_to_ignore)
elif self.properties_to_include is not None:
properties_df = properties_df[self.properties_to_include]
df = properties_df.join(label)

df_pps = pps.predictors(df=df, y='label', random_seed=context.random_state,
**self.ppscore_params)
Expand Down
8 changes: 3 additions & 5 deletions deepchecks/nlp/checks/model_evaluation/__init__.py
Expand Up @@ -9,13 +9,11 @@
# ----------------------------------------------------------------------------
#
"""Module containing the model evaluation checks in the nlp package."""

from deepchecks.nlp.checks.model_evaluation.confusion_matrix_report import ConfusionMatrixReport
from deepchecks.nlp.checks.model_evaluation.prediction_drift import PredictionDrift
from deepchecks.nlp.checks.model_evaluation.single_dataset_performance import SingleDatasetPerformance
from deepchecks.nlp.checks.model_evaluation.weak_segments_performance import (MetadataSegmentsPerformance,
PropertySegmentsPerformance)

__all__ = [
'SingleDatasetPerformance', 'MetadataSegmentsPerformance', 'PropertySegmentsPerformance',
'PredictionDrift'
]
__all__ = ['SingleDatasetPerformance', 'MetadataSegmentsPerformance', 'PropertySegmentsPerformance',
'PredictionDrift', 'ConfusionMatrixReport']
@@ -0,0 +1,61 @@
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""The confusion_matrix_report check module."""
import numpy as np

from deepchecks.core import CheckResult
from deepchecks.nlp import Context, SingleDatasetCheck
from deepchecks.utils.abstracts.confusion_matrix_abstract import run_confusion_matrix_check

__all__ = ['ConfusionMatrixReport']


class ConfusionMatrixReport(SingleDatasetCheck):
"""Calculate the confusion matrix of the model on the given dataset.

Parameters
----------
normalize_display : bool , default: True:
boolean that determines whether to normalize the values of the matrix in the display.
n_samples : int , default: 10_000
number of samples to use for this check.
random_state : int, default: 42
random seed for all check internals.
"""

def __init__(self,
normalize_display: bool = True,
n_samples: int = 1_000_000,
random_state: int = 42,
**kwargs):
super().__init__(**kwargs)
self.normalize_display = normalize_display
self.n_samples = n_samples
self.random_state = random_state

def run_logic(self, context: Context, dataset_kind) -> CheckResult:
"""Run check.

Returns
-------
CheckResult
value is numpy array of the confusion matrix, displays the confusion matrix

Raises
------
DeepchecksValueError
If the data is not a Dataset instance with a label
"""
dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
Nadav-Barak marked this conversation as resolved.
Show resolved Hide resolved
y_true = np.asarray(dataset.label)
y_pred = np.array(context.model.predict(dataset)).reshape(len(y_true), )

return run_confusion_matrix_check(y_pred, y_true, context.with_display, self.normalize_display)
Expand Up @@ -22,8 +22,8 @@
from deepchecks.nlp import Context, SingleDatasetCheck
from deepchecks.tabular import Dataset
from deepchecks.tabular.context import _DummyModel
from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.dataframes import select_from_dataframe
from deepchecks.utils.performance.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.typing import Hashable

__all__ = ['MetadataSegmentsPerformance', 'PropertySegmentsPerformance']
Expand Down
4 changes: 4 additions & 0 deletions deepchecks/nlp/text_data.py
Expand Up @@ -421,6 +421,10 @@ def set_properties(self, properties: pd.DataFrame, properties_types: t.Optional[
@property
def properties(self) -> pd.DataFrame:
"""Return the properties of the dataset."""
if self._properties is None:
raise DeepchecksNotSupportedError(
'TextData does not contain properties, add them by using calculate_default_properties or '
'set_properties functions')
return self._properties

@property
Expand Down
2 changes: 1 addition & 1 deletion deepchecks/nlp/utils/nlp_plot.py
Expand Up @@ -86,7 +86,7 @@ def get_text_outliers_graph(dist: Sequence, data: Sequence[str], lower_limit: fl
y=cat_df[dist_name],
marker=dict(color=color_discrete_sequence),
name='Common',
text=[f'{x:.2%}' for x in cat_df[dist_name]],
# text=[f'{x:.2%}' for x in cat_data[dist_name]],
customdata=hover_data,
hovertemplate=hover_template

Expand Down
Expand Up @@ -10,12 +10,10 @@
#
"""The confusion_matrix_report check module."""
import numpy as np
import pandas as pd
from sklearn import metrics

from deepchecks.core import CheckResult
from deepchecks.tabular import Context, SingleDatasetCheck
from deepchecks.utils.plot import create_confusion_matrix_figure
from deepchecks.utils.abstracts.confusion_matrix_abstract import run_confusion_matrix_check

__all__ = ['ConfusionMatrixReport']

Expand All @@ -25,21 +23,21 @@ class ConfusionMatrixReport(SingleDatasetCheck):

Parameters
----------
normalized (bool, default True):
boolean that determines whether to normalize the true values of the matrix.
normalize_display : bool , default: True:
boolean that determines whether to normalize the values of the matrix in the display.
n_samples : int , default: 1_000_000
number of samples to use for this check.
random_state : int, default: 42
random seed for all check internals.
"""

def __init__(self,
normalized: bool = True,
normalize_display: bool = True,
n_samples: int = 1_000_000,
random_state: int = 42,
**kwargs):
super().__init__(**kwargs)
self.normalized = normalized
self.normalize_display = normalize_display
self.n_samples = n_samples
self.random_state = random_state

Expand All @@ -58,18 +56,7 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
"""
dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
context.assert_classification_task()
ds_y = dataset.label_col
ds_x = dataset.features_columns
model = context.model
y_true = dataset.label_col
y_pred = np.array(context.model.predict(dataset.features_columns)).reshape(len(y_true), )

y_pred = np.array(model.predict(ds_x)).reshape(len(ds_y), )
total_classes = sorted(list(set(pd.concat([ds_y, pd.Series(y_pred)]).to_list())))
confusion_matrix = metrics.confusion_matrix(ds_y, y_pred)

if context.with_display:
fig = create_confusion_matrix_figure(confusion_matrix, total_classes,
total_classes, self.normalized)
else:
fig = None

return CheckResult(confusion_matrix, display=fig)
return run_confusion_matrix_check(y_pred, y_true, context.with_display, self.normalize_display)
Expand Up @@ -21,8 +21,8 @@
from deepchecks.tabular import Context, SingleDatasetCheck
from deepchecks.tabular.context import _DummyModel
from deepchecks.tabular.utils.task_type import TaskType
from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.docref import doclink
from deepchecks.utils.performance.weak_segment_abstract import WeakSegmentAbstract
from deepchecks.utils.single_sample_metrics import calculate_per_sample_loss
from deepchecks.utils.typing import Hashable

Expand Down
79 changes: 79 additions & 0 deletions deepchecks/utils/abstracts/confusion_matrix_abstract.py
@@ -0,0 +1,79 @@
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""The confusion_matrix_report check module."""
from typing import List

import numpy as np
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix

from deepchecks.core import CheckResult
from deepchecks.utils.strings import format_number_if_not_nan

__all__ = ['create_confusion_matrix_figure', 'run_confusion_matrix_check']


def run_confusion_matrix_check(y_pred: np.ndarray, y_true: np.ndarray, with_display=True,
normalize_display=True) -> CheckResult:
"""Calculate confusion matrix based on predictions and true label values."""
total_classes = sorted([str(x) for x in set(y_pred).union(set(y_true))])
result = confusion_matrix(y_true, y_pred)

if with_display:
fig = create_confusion_matrix_figure(result, total_classes, normalize_display)
else:
fig = None

return CheckResult(result, display=fig)


def create_confusion_matrix_figure(confusion_matrix_data: np.ndarray, classes_names: List[str],
normalize_display: bool):
"""Create a confusion matrix figure.

Parameters
----------
confusion_matrix_data: np.ndarray
2D array containing the confusion matrix.
classes_names: List[str]
the names of the classes to display as the axis.
normalize_display: bool
if True will also show normalized values by the true values.

Returns
-------
plotly Figure object
confusion matrix figure

"""
if normalize_display:
confusion_matrix_norm = confusion_matrix_data.astype('float') / \
(confusion_matrix_data.sum(axis=1)[:, np.newaxis] + np.finfo(float).eps) * 100
z = np.vectorize(format_number_if_not_nan)(confusion_matrix_norm)
text_template = '%{z}%<br>(%{text})'
color_bar_title = '% out of<br>True Values'
plot_title = 'Percent Out of True Values (Count)'
else:
z = confusion_matrix_data
color_bar_title = None
text_template = '%{text}'
plot_title = 'Value Count'

fig = go.Figure(data=go.Heatmap(
x=classes_names, y=classes_names, z=z,
text=confusion_matrix_data, texttemplate=text_template))
fig.data[0].colorbar.title = color_bar_title
fig.update_layout(title=plot_title)
fig.update_layout(height=600)
fig.update_xaxes(title='Predicted Value', type='category', scaleanchor='y', constrain='domain')
fig.update_yaxes(title='True value', type='category', constrain='domain', autorange='reversed')

return fig