Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nb/bug/weak segments fill na #2560

Merged
merged 5 commits into from
May 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:

# Running the logic
weak_segments = self._weak_segments_search(data=encoded_dataset.data, score_per_sample=score_per_sample,
label_col=original_label,
label_col=pd.Series(original_label, index=score_per_sample.index),
feature_rank_for_search=np.asarray(encoded_dataset.features),
dummy_model=dummy_model, scorer=scorer)

Expand Down
4 changes: 2 additions & 2 deletions deepchecks/nlp/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from deepchecks.tabular.utils.task_type import TaskType as TabularTaskType
from deepchecks.utils.docref import doclink
from deepchecks.utils.logger import get_logger
from deepchecks.utils.typing import ClassificationModel
from deepchecks.utils.typing import BasicModel
from deepchecks.utils.validation import is_sequence_not_str

__all__ = [
Expand All @@ -54,7 +54,7 @@
TTextProba = t.Sequence[t.Sequence[float]]


class _DummyModel(ClassificationModel):
class _DummyModel(BasicModel):
"""Dummy model class used for inference with static predictions from the user.

Parameters
Expand Down
40 changes: 13 additions & 27 deletions deepchecks/nlp/input_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from deepchecks.nlp.task_type import TaskType, TTextLabel
from deepchecks.utils.logger import get_logger
from deepchecks.utils.metrics import is_label_none
from deepchecks.utils.type_inference import infer_categorical_features
from deepchecks.utils.type_inference import infer_categorical_features, infer_numerical_features
from deepchecks.utils.validation import is_sequence_not_str

if TYPE_CHECKING:
Expand Down Expand Up @@ -136,39 +136,25 @@ def validate_length_and_calculate_column_types(
)

if categorical_columns is None: # TODO: Add tests
categorical_features = infer_categorical_features(data_table)
numeric_features = [
c for c in data_table.columns
if c not in categorical_features
]

column_types = ColumnTypes(
categorical_columns=categorical_features,
numerical_columns=numeric_features
)

categorical_columns = infer_categorical_features(data_table)
get_logger().info(
'%s types were not provided, auto inferred types are:\n%s',
'%s types were not provided, auto inferred as categorical are:\n%s',
data_table_name,
column_types._asdict()
categorical_columns
)
else:
difference = set(categorical_columns).difference(data_table.columns)
if len(difference) != 0:
raise DeepchecksValueError(
f'The following columns does not exist in {data_table_name} - {list(difference)}'
)

return column_types

difference = set(categorical_columns).difference(data_table.columns)

if len(difference) != 0:
raise DeepchecksValueError(
f'The following columns does not exist in {data_table_name} - {list(difference)}'
)
other_features = set(data_table.columns) - set(categorical_columns)
numeric_features = infer_numerical_features(data_table[other_features])
Nadav-Barak marked this conversation as resolved.
Show resolved Hide resolved

numeric_features = [
c for c in data_table.columns
if c not in categorical_columns
]
return ColumnTypes(
categorical_columns=list(categorical_columns),
numerical_columns=numeric_features
numerical_columns=list(numeric_features)
)


Expand Down
21 changes: 14 additions & 7 deletions deepchecks/nlp/text_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class TextData:
_properties: t.Optional[t.Union[pd.DataFrame, str]] = None
_cat_properties: t.Optional[t.List[str]] = None
_cat_metadata: t.Optional[t.List[str]] = None
_numeric_metadata: t.Optional[t.List[str]] = None
_original_text_index: t.Optional[t.Sequence[int]] = None # Sequence is np array

def __init__(
Expand Down Expand Up @@ -267,13 +268,13 @@ def sample(self: TDataset, n_samples: int, replace: bool = False, random_state:
Dataset
instance of the Dataset with sampled internal dataframe.
"""
samples = np.arange(len(self))
samples_to_choose_from = np.arange(len(self))
if drop_na_label and self.has_label():
samples = samples[[not is_label_none(x) for x in self._label]]
n_samples = min(n_samples, len(samples))
samples_to_choose_from = samples_to_choose_from[[not is_label_none(x) for x in self._label]]
n_samples = min(n_samples, len(samples_to_choose_from))

np.random.seed(random_state)
sample_idx = np.random.choice(range(len(samples)), n_samples, replace=replace)
sample_idx = np.random.choice(samples_to_choose_from, n_samples, replace=replace)
return self.copy(rows_to_use=sorted(sample_idx))

def __len__(self) -> int:
Expand All @@ -292,7 +293,7 @@ def n_samples(self) -> int:

@property
def embeddings(self) -> pd.DataFrame:
"""Return the metadata of for the dataset."""
"""Return the embeddings of for the dataset."""
if self._embeddings is None:
raise DeepchecksValueError(
'Functionality requires embeddings, but the the TextData object had none. To use this functionality, '
Expand All @@ -319,7 +320,7 @@ def calculate_builtin_embeddings(self, model: str = 'miniLM', file_path: str = '
self._embeddings = calculate_builtin_embeddings(text=self.text, model=model, file_path=file_path)

def set_embeddings(self, embeddings: np.ndarray, verbose: bool = True):
"""Set the metadata of the dataset.
"""Set the embeddings of the dataset.

Parameters
----------
Expand Down Expand Up @@ -353,10 +354,15 @@ def metadata(self) -> pd.DataFrame:
return self._metadata

@property
def categorical_metadata_columns(self) -> t.List[str]:
def categorical_metadata(self) -> t.List[str]:
"""Return categorical metadata column names."""
return self._cat_metadata

@property
def numerical_metadata(self) -> t.List[str]:
"""Return numeric metadata column names."""
return self._numeric_metadata

def set_metadata(
self,
metadata: pd.DataFrame,
Expand All @@ -378,6 +384,7 @@ def set_metadata(

self._metadata = metadata.reset_index(drop=True)
self._cat_metadata = column_types.categorical_columns
self._numeric_metadata = column_types.numerical_columns

def calculate_builtin_properties(
self,
Expand Down
5 changes: 3 additions & 2 deletions deepchecks/nlp/utils/weak_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ def get_relevant_data_table(text_data: TextData, data_type: str, columns: Union[
ignore_columns: Union[Hashable, List[Hashable], None], n_top_features: Optional[int]):
"""Get relevant data table from the database."""
if data_type == 'metadata':
features = select_from_dataframe(text_data.metadata, columns, ignore_columns)
cat_features = [col for col in features.columns if col in text_data.categorical_metadata_columns]
relevant_metadata = text_data.metadata[text_data.categorical_metadata + text_data.numerical_metadata]
features = select_from_dataframe(relevant_metadata, columns, ignore_columns)
cat_features = [col for col in features.columns if col in text_data.categorical_metadata]

elif data_type == 'properties':
features = select_from_dataframe(text_data.properties, columns, ignore_columns)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,12 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
raise DeepchecksNotSupportedError('Check requires data to have at least two features in order to run.')

# Decide which scorer and score_per_sample to use in the algorithm run
encoded_dataset = self._target_encode_categorical_features_fill_na(dataset_subset.features_columns,
features_data = dataset_subset.features_columns[dataset_subset.numerical_features + dataset_subset.cat_features]
encoded_dataset = self._target_encode_categorical_features_fill_na(features_data,
dataset.label_col,
dataset_subset.cat_features,
context.task_type != TaskType.REGRESSION)

if self.score_per_sample is not None:
score_per_sample = self.score_per_sample[list(dataset.data.index)]
scorer, dummy_model = None, None
Expand Down
6 changes: 6 additions & 0 deletions deepchecks/utils/abstracts/weak_segment_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ def _weak_segments_search(self, data: pd.DataFrame, score_per_sample: pd.Series,
scorer: Optional[DeepcheckScorer] = None, scorer_name: Optional[str] = None) \
-> pd.DataFrame:
"""Search for weak segments based on scorer."""
# Remove samples with NaN score per sample
score_per_sample = score_per_sample.dropna()
data = data.loc[score_per_sample.index]
if label_col is not None:
label_col = label_col.loc[score_per_sample.index]
Nadav-Barak marked this conversation as resolved.
Show resolved Hide resolved

if scorer_name is None and scorer is None:
score_title = 'Average Score Per Sample'
else:
Expand Down
25 changes: 18 additions & 7 deletions deepchecks/utils/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_numeric_dtype

from deepchecks.core.errors import DeepchecksValueError
from deepchecks.utils.type_inference import infer_categorical_features
from deepchecks.utils.typing import Hashable
from deepchecks.utils.validation import ensure_hashable_or_mutable_sequence

Expand All @@ -25,22 +26,32 @@
'cast_categorical_to_object_dtype']


def default_fill_na_per_column_type(df: pd.DataFrame, cat_features: t.Union[pd.Series, t.List]) -> pd.DataFrame:
def default_fill_na_per_column_type(df: pd.DataFrame, cat_features: t.Optional[t.Union[pd.Series, t.List]]) \
-> pd.DataFrame:
"""Fill NaN values per column type."""
pd.set_option('mode.chained_assignment', None)
if cat_features is None:
cat_features = infer_categorical_features(df)

result = {}
for col_name in df.columns:
df[col_name] = default_fill_na_series(df[col_name], col_name in cat_features)
return df
modified_col = default_fill_na_series(df[col_name], col_name in cat_features)
if modified_col is not None:
result[col_name] = modified_col
return pd.DataFrame(result, index=df.index)


def default_fill_na_series(col: pd.Series, is_categorical: t.Optional[bool] = None) -> pd.Series:
"""Fill NaN values based on column type."""
if is_categorical:
def default_fill_na_series(col: pd.Series, is_cat_column: t.Optional[bool] = None) -> t.Optional[pd.Series]:
"""Fill NaN values based on column type if possible otherwise returns None."""
if is_cat_column:
return col.astype('object').fillna('None')
elif is_numeric_dtype(col):
return col.astype('float64').fillna(col.mean())
else:
return col.fillna(col.mode(), inplace=True)
common_values_list = col.mode()
if isinstance(common_values_list, pd.Series) and len(common_values_list) > 0:
Nadav-Barak marked this conversation as resolved.
Show resolved Hide resolved
return col.fillna(common_values_list[0])
return None


def floatify_dataframe(df: pd.DataFrame):
Expand Down
2 changes: 1 addition & 1 deletion deepchecks/utils/type_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def infer_numerical_features(df: pd.DataFrame) -> t.List[Hashable]:
for col in columns:
col_data = df[col]
if col_data.dtype == 'object':
# object might still be only floats, so we rest the dtype
# object might still be only floats, so we reset the dtype
col_data = pd.Series(col_data.to_list())
if is_numeric_dtype(col_data):
numerical_columns.append(col)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_tweet_emotion_metadata(tweet_emotion_train_test_textdata):
assert_that(result.value['avg_score'], close_to(0.5, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(5))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.366, 0.01))
assert_that(result.value['weak_segments_list'].iloc[0, 1], equal_to('user_age'))
assert_that(result.value['weak_segments_list'].iloc[0, 1], equal_to('user_region'))


def test_tweet_emotion_metadata_interesting_segment(tweet_emotion_train_test_textdata):
Expand All @@ -81,7 +81,7 @@ def test_tweet_emotion_metadata_interesting_segment(tweet_emotion_train_test_tex
assert_that(result.value['avg_score'], close_to(0.844, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(6))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0, 0.01))
assert_that(result.value['weak_segments_list'].iloc[0, 1], equal_to('user_age'))
assert_that(result.value['weak_segments_list'].iloc[0, 1], equal_to('user_region'))


def test_tweet_emotion_metadata_fully_annotated(tweet_emotion_train_test_textdata):
Expand Down Expand Up @@ -148,5 +148,5 @@ def test_multilabel_dataset(multilabel_mock_dataset_and_probabilities):
))

assert_that(result.value['avg_score'], close_to(0.5, 0.001))
assert_that(len(result.value['weak_segments_list']), equal_to(5))
assert_that(len(result.value['weak_segments_list']), equal_to(4))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.326, 0.01))
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,50 @@
# ----------------------------------------------------------------------------
#
"""Test for the NLP WeakSegmentsPerformance check"""
import numpy as np
import pandas as pd
import pytest
from hamcrest import assert_that, close_to, equal_to, has_items, is_in, matches_regexp
from hamcrest import assert_that, close_to, equal_to, has_items, is_in, matches_regexp, calling, raises

from deepchecks.core.errors import DeepchecksNotSupportedError
from deepchecks.nlp.checks import MetadataSegmentsPerformance, PropertySegmentsPerformance
from tests.base.utils import equal_condition_result


def test_error_no_proba_provided(tweet_emotion_train_test_textdata, tweet_emotion_train_test_predictions):
# Arrange
_, test = tweet_emotion_train_test_textdata
_, test_preds = tweet_emotion_train_test_predictions
check = MetadataSegmentsPerformance()

# Act & Assert
assert_that(calling(check.run).with_args(test, predictions=test_preds),
raises(DeepchecksNotSupportedError, 'Predicted probabilities not supplied. The weak segment '
'checks relies on cross entropy error that requires predicted '
'probabilities, rather than only predicted classes.'))


def test_column_with_nones(tweet_emotion_train_test_textdata, tweet_emotion_train_test_probabilities):
# Arrange
_, test = tweet_emotion_train_test_textdata
_, test_probas = tweet_emotion_train_test_probabilities
test = test.copy()
test_probas = np.asarray([[None] * 4] * 3 + list(test_probas)[3:])
test._labels = np.asarray(list(test._label[3:]) + [None] * 3)
metadata = test.metadata.copy()
metadata['new_numeric_col'] = list(range(1976)) + [None, np.nan]
metadata['new_cat_col'] = [None, np.nan, pd.NA] + [1, 2, 3, 4, 5] * 395
test.set_metadata(metadata)

# Act
result = MetadataSegmentsPerformance().run(test, probabilities=test_probas)

# Assert
assert_that(result.value['avg_score'], close_to(0.707, 0.01))
assert_that(len(result.value['weak_segments_list']), equal_to(10))
assert_that(result.value['weak_segments_list'].iloc[0, 0], close_to(0.305, 0.01))


def test_tweet_emotion(tweet_emotion_train_test_textdata, tweet_emotion_train_test_probabilities):
# Arrange
_, test = tweet_emotion_train_test_textdata
Expand All @@ -29,9 +66,9 @@ def test_tweet_emotion(tweet_emotion_train_test_textdata, tweet_emotion_train_te
assert_that(condition_result, has_items(
equal_condition_result(is_pass=False,
details='Found a segment with accuracy score of 0.305 in comparison '
'to an average score of 0.708 in sampled data.',
'to an average score of 0.708 in sampled data.',
name='The relative performance of weakest segment is greater than '
'80% of average model performance.')
'80% of average model performance.')
))

assert_that(result.value['avg_score'], close_to(0.708, 0.001))
Expand All @@ -52,9 +89,9 @@ def test_tweet_emotion_properties(tweet_emotion_train_test_textdata, tweet_emoti
assert_that(condition_result, has_items(
equal_condition_result(is_pass=True,
details='Found a segment with accuracy score of 0.525 in comparison to an average '
'score of 0.708 in sampled data.',
'score of 0.708 in sampled data.',
name='The relative performance of weakest segment is greater than 70% of average '
'model performance.')
'model performance.')
))

assert_that(result.value['avg_score'], close_to(0.708, 0.001))
Expand Down Expand Up @@ -107,9 +144,9 @@ def test_multilabel_dataset(multilabel_mock_dataset_and_probabilities):
pat = r'Found a segment with f1 macro score of \d+.\d+ in comparison to an average score of 0.83 in sampled data.'
assert_that(condition_result[0].details, matches_regexp(pat))
assert_that(condition_result[0].name, equal_to('The relative performance '
'of weakest segment is greater '
'than 80% of average model '
'performance.'))
'of weakest segment is greater '
'than 80% of average model '
'performance.'))

assert_that(result.value['avg_score'], close_to(0.83, 0.001))
assert_that(len(result.value['weak_segments_list']), is_in([5, 6])) # TODO: check why it's not always 5
Expand All @@ -122,7 +159,7 @@ def test_multilabel_just_dance(just_dance_train_test_textdata, just_dance_train_
_, probabilities = just_dance_train_test_textdata_probas
assert_that(data.is_multi_label_classification(), equal_to(True))

data = data.copy(rows_to_use = range(1000))
data = data.copy(rows_to_use=range(1000))
probabilities = probabilities[:1000, :]
check = PropertySegmentsPerformance()

Expand Down
4 changes: 2 additions & 2 deletions tests/nlp/test_text_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def test_set_metadata(text_classification_dataset_mock):

# Assert
assert_that((dataset.metadata != metadata).sum().sum(), equal_to(0))
assert_that(dataset.categorical_metadata_columns, equal_to([]))
assert_that(dataset.categorical_metadata, equal_to([]))


def test_set_metadata_with_categorical_columns(text_classification_dataset_mock):
Expand All @@ -251,7 +251,7 @@ def test_set_metadata_with_categorical_columns(text_classification_dataset_mock)

# Assert
assert_that((dataset.metadata != metadata).sum().sum(), equal_to(0))
assert_that(dataset.categorical_metadata_columns, equal_to(['second']))
assert_that(dataset.categorical_metadata, equal_to(['second']))


def test_set_metadata_with_an_incorrect_list_of_categorical_columns(text_classification_dataset_mock):
Expand Down