Merge branch 'main' into 0.14.x

# Conflicts: # VERSION
deepchecks · May 21, 2023 · 119104d · 119104d
2 parents 3ff8209 + fbecc3e
commit 119104d
Show file tree

Hide file tree

Showing 69 changed files with 3,303 additions and 846 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -157,7 +157,7 @@ jobs:
       - name: Set Up Env
         run: make env
       - name: Run Tests
-        run: make test args=tests/vision/gpu_tests
+        run: make vision-gpu-tests
 
 
 #  documentation-check:

diff --git a/.gitignore b/.gitignore
@@ -138,4 +138,14 @@ benchmarks/results
 deepchecks/nlp/utils/.nlp-models
 
 # embedding files
-tests/nlp/utils/embeddings.csv
+tests/nlp/utils/embeddings.csv
+embeddings.csv.npy
+embeddings.npy
+deepchecks/nlp/datasets/assets/tweet_emotion/tweet_emotion_embeddings.npy
+
+# nlp datasets
+deepchecks/nlp/datasets/assets/just_dance_comment_analysis
+
+# nlp test properties
+metadata.csv
+test_properties.csv
diff --git a/CITATION.cff b/CITATION.cff
@@ -1,49 +1,97 @@
 cff-version: 1.2.0
-title: >-
-  Deepchecks: A Library for Testing and Validating
-  Machine Learning Models and Data
-message: >-
-  If you use this software, please cite it using the
-  metadata from this file.
+title: "Deepchecks: A Library for Testing and Validating Machine Learning Models and Data"
+message: "If you use this software, please cite it using the metadata from this file."
 type: software
 authors:
-  - given-names: Shir
-    family-names: Chorev
+- family-names: Chorev
+  given-names: Shir  
+  email: shir@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Tannor
+  given-names: Philip
+  email: philip@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Ben Israel
+  given-names: Dan
+  email: danb@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Bressler
+  given-names: Noam
+  email: noam@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Gabbay
+  given-names: Itay
+  email: itay@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Hutnik 
+  given-names: Nir
+  email: nir@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Liberman
+  given-names: Jonatan
+  email: jonatan@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Perlmutter
+  given-names: Matan
+  email: matan@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Romanyshyn
+  given-names: Yurii
+  email: yurii@deepchecks.com
+  affiliation: Deepchecks Ltd.
+- family-names: Rokach
+  given-names: Lior
+  email: liorrk@bgu.ac.il
+  affiliation: Deepchecks Ltd. and Department of Software and Info. Sys. Eng. Ben-Gurion University of the Negev
+url: "https://github.com/deepchecks/deepchecks"
+preferred-citation:
+  type: article
+  authors:
+  - family-names: Chorev
+    given-names: Shir  
     email: shir@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Philip
-    family-names: Tannor
+  - family-names: Tannor
+    given-names: Philip
     email: philip@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Dan
-    family-names: Ben Israel
+  - family-names: Ben Israel
+    given-names: Dan
     email: danb@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Noam
-    family-names: Bressler
+  - family-names: Bressler
+    given-names: Noam
     email: noam@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Itay
-    family-names: Gabbay
+  - family-names: Gabbay
+    given-names: Itay
     email: itay@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Nir
-    family-names: Hutnik
+  - family-names: Hutnik 
+    given-names: Nir
     email: nir@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Jonatan
-    family-names: Liberman
+  - family-names: Liberman
+    given-names: Jonatan
     email: jonatan@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Matan
-    family-names: Perlmutter
+  - family-names: Perlmutter
+    given-names: Matan
     email: matan@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Yurii
-    family-names: Romanyshyn
+  - family-names: Romanyshyn
+    given-names: Yurii
     email: yurii@deepchecks.com
     affiliation: Deepchecks Ltd.
-  - given-names: Lior
-    family-names: Rokach
-    email: liorrk@bgu.ac.ail
+  - family-names: Rokach
+    given-names: Lior
+    email: liorrk@bgu.ac.il
     affiliation: Deepchecks Ltd. and Department of Software and Info. Sys. Eng. Ben-Gurion University of the Negev
+  title: "Deepchecks: A Library for Testing and Validating Machine Learning Models and Data"
+  journal: Journal of Machine Learning Research
+  year: 2022
+  volume: 23
+  number: 265
+  start: 1
+  end: 6
+  url: "http://jmlr.org/papers/v23/22-0281.html"
diff --git a/README.md b/README.md
@@ -70,7 +70,7 @@ data integrity, distribution mismatches, and more.
 **This README refers to the Tabular version** of deepchecks.
 
 - Check out the [Deepchecks for Computer Vision & Images subpackage](deepchecks/vision) for more details about deepchecks for CV, currently in *beta release*.
-- Check out the [Deepchecks for NLP subpackage](deepchecks/nlp) for more details about deepchecks for NLP, currently in *alpha release*.
+- Check out the [Deepchecks for NLP subpackage](deepchecks/nlp) for more details about deepchecks for NLP, currently in *beta release*.
 
 
 ## 💻 Installation
@@ -93,7 +93,7 @@ pip install deepchecks -U --user
 > ```
 >  
 > To install deepchecks together with the **NLP Submodule** that 
-> is currently in *alpha release*, replace 
+> is currently in *beta release*, replace 
 > ``deepchecks`` with ``"deepchecks[nlp]"`` as follows:   
 > ```bash
 > pip install "deepchecks[nlp]" -U --user
@@ -303,7 +303,7 @@ subset of the following:
 
 The package currently supports tabular data and is in:
 - *beta release* for the [Computer Vision subpackage](deepchecks/vision).
-- *alpha release* for the [NLP subpackage](deepchecks/nlp).
+- *beta release* for the [NLP subpackage](deepchecks/nlp).
 
 
 ## 📖 Documentation

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.14.0
+0.14.1
diff --git a/deepchecks/core/serialization/dataframe/html.py b/deepchecks/core/serialization/dataframe/html.py
@@ -14,6 +14,7 @@
 
 import pandas as pd
 from pandas.io.formats.style import Styler
+from pkg_resources import parse_version
 
 from deepchecks.core.serialization.abc import HtmlSerializer
 
@@ -49,7 +50,11 @@ def serialize(self, **kwargs) -> str:
             # Using deprecated pandas method so hiding the warning
             with warnings.catch_warnings():
                 warnings.simplefilter(action='ignore', category=FutureWarning)
-                df_styler.set_precision(2)
+                # Set precision is deprecated since pandas 1.3.0
+                if parse_version(pd.__version__) < parse_version('1.3.0'):
+                    df_styler.set_precision(2)
+                else:
+                    df_styler.format(precision=2)
                 table_css_props = [
                     ('text-align', 'left'),  # Align everything to the left
                     ('white-space', 'pre-wrap')  # Define how to handle white space characters (like \n)

diff --git a/deepchecks/nlp/checks/data_integrity/conflicting_labels.py b/deepchecks/nlp/checks/data_integrity/conflicting_labels.py
@@ -11,6 +11,7 @@
 """Module contains Conflicting Labels check."""
 import typing as t
 
+import numpy as np
 import pandas as pd
 
 from deepchecks.core import CheckResult
@@ -83,7 +84,8 @@ def _truncate_text(self, x: str) -> str:
 
     def run_logic(self, context: Context, dataset_kind) -> CheckResult:
         """Run check."""
-        dataset = context.get_data_by_kind(dataset_kind).sample(self.n_samples, random_state=self.random_state)
+        dataset = context.get_data_by_kind(dataset_kind)
+        dataset = dataset.sample(self.n_samples, random_state=self.random_state, drop_na_label=True)
         dataset = t.cast(TextData, dataset)
         samples = dataset.text
         n_of_samples = len(samples)
@@ -96,12 +98,14 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
             **self._text_normalization_kwargs
         ))
 
-        if dataset.task_type is TaskType.TOKEN_CLASSIFICATION or dataset.is_multi_label_classification():
+        if dataset.task_type is TaskType.TOKEN_CLASSIFICATION:
             labels = [tuple(t.cast(t.Sequence[t.Any], it)) for it in dataset.label]
+        elif dataset.is_multi_label_classification():
+            labels = [tuple(np.where(row == 1)[0]) for row in dataset.label]
         elif dataset.task_type is TaskType.TEXT_CLASSIFICATION:
             labels = dataset.label
         else:
-            raise DeepchecksValueError(f'Unknow task type - {dataset.task_type}')
+            raise DeepchecksValueError(f'Unknown task type - {dataset.task_type}')
 
         df = pd.DataFrame({
             'hash': samples_hashes,
@@ -117,7 +121,7 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
         ambiguous_samples_hashes = n_of_labels_per_sample[n_of_labels_per_sample > 1]
         ambiguous_samples_hashes = frozenset(ambiguous_samples_hashes.index.to_list())
 
-        ambiguous_samples = df[df['hash'].isin(ambiguous_samples_hashes)]
+        ambiguous_samples = df[df['hash'].isin(ambiguous_samples_hashes)].copy()
         num_of_ambiguous_samples = ambiguous_samples['Text'].count()
         percent_of_ambiguous_samples = num_of_ambiguous_samples / n_of_samples
 
@@ -134,7 +138,7 @@ def run_logic(self, context: Context, dataset_kind) -> CheckResult:
         if context.with_display is False or num_of_ambiguous_samples == 0:
             return CheckResult(value=result_value)
 
-        ambiguous_samples['Text'] = ambiguous_samples['Text'].apply(self._truncate_text)
+        ambiguous_samples.loc[:, 'Text'] = ambiguous_samples['Text'].apply(self._truncate_text)
         by_hash = ambiguous_samples.groupby(['hash'], dropna=False)
         observed_labels = by_hash['Label'].aggregate(lambda x: format_list(x.to_list()))
         samples_ids = by_hash['Sample ID'].aggregate(lambda x: format_list(x.to_list(), max_string_length=200))

diff --git a/deepchecks/nlp/checks/data_integrity/under_annotated_segments.py b/deepchecks/nlp/checks/data_integrity/under_annotated_segments.py
@@ -20,6 +20,7 @@
 from deepchecks.core.check_result import DisplayMap
 from deepchecks.core.errors import DeepchecksProcessError
 from deepchecks.nlp import Context, SingleDatasetCheck
+from deepchecks.nlp.utils.text import break_to_lines_and_trim
 from deepchecks.nlp.utils.weak_segments import get_relevant_data_table
 from deepchecks.utils.abstracts.weak_segment_abstract import WeakSegmentAbstract
 from deepchecks.utils.metrics import is_label_none
@@ -90,7 +91,7 @@ def _generate_scatter_plot_display(self, encoded_data: pd.DataFrame, is_annotate
         display_tabs = {}
         if weak_segments.shape[0] > self.n_to_show:
             weak_segments = weak_segments.iloc[:self.n_to_show, :]
-        encoded_data['text'] = text
+        encoded_data['text'] = [break_to_lines_and_trim(sample) for sample in text]
 
         # Handle categorical features
         jitter = 0.25
@@ -111,8 +112,8 @@ def _generate_scatter_plot_display(self, encoded_data: pd.DataFrame, is_annotate
             if feature_2 != '':  # segment by two features
                 feature_2_lower, feature_2_upper = self._get_box_boundaries(encoded_data[feature_2],
                                                                             row['Feature2 Range'])
-                hover_template = '<b>' + feature_1 + '<b>: %{x}<br><b>' + feature_2 + \
-                                 '<b>: %{y}<br><b>text<b>: %{text}<br><b>Annotated<b>: '
+                hover_template = '<b>' + feature_1 + '</b>: %{x}<br><b>' + feature_2 + \
+                                 '</b>: %{y}<br><b>text</b>: %{text}<br><b>Annotated</b>: '
                 tab_title = f'{feature_1} vs {feature_2}'
                 range_f1 = self._format_partition_vec_for_display([feature_1_lower, feature_1_upper], feature_1, ', ')
                 range_f2 = self._format_partition_vec_for_display([feature_2_lower, feature_2_upper], feature_2, ', ')
@@ -122,7 +123,7 @@ def _generate_scatter_plot_display(self, encoded_data: pd.DataFrame, is_annotate
                 feature_2 = 'virtual_col'
                 feature_2_lower = encoded_data['virtual_col'].min() * 1.3
                 feature_2_upper = encoded_data['virtual_col'].max() * 1.3
-                hover_template = '<b>' + feature_1 + '<b>: %{x}<br><b>text<b>: %{text}<br><b>Annotated<b>: '
+                hover_template = '<b>' + feature_1 + '</b>: %{x}<br><b>text</b>: %{text}<br><b>Annotated</b>: '
                 tab_title = feature_1
                 range_f1 = self._format_partition_vec_for_display([feature_1_lower, feature_1_upper], feature_1, ', ')
                 msg = f'Under annotated segment contains samples with {feature_1} in {range_f1[0]}.'
@@ -162,7 +163,6 @@ def _generate_scatter_plot_display(self, encoded_data: pd.DataFrame, is_annotate
                      f' in whole data)</sub>',
                 font=dict(size=24)),
                 xaxis_title=feature_1, yaxis_title=feature_2 if feature_2 != 'virtual_col' else '',
-                autosize=False, width=1000, height=600,
                 font=dict(size=14),
                 plot_bgcolor='rgba(245, 245, 245, 1)',
                 xaxis=dict(gridcolor='rgba(200, 200, 200, 0.5)',

diff --git a/deepchecks/nlp/checks/model_evaluation/prediction_drift.py b/deepchecks/nlp/checks/model_evaluation/prediction_drift.py
@@ -9,13 +9,16 @@
 # ----------------------------------------------------------------------------
 #
 """Module contains Prediction Drift check."""
+import warnings
 
 import numpy as np
 
 from deepchecks.core import CheckResult
 from deepchecks.core.errors import DeepchecksValueError
 from deepchecks.nlp import Context, TrainTestCheck
+from deepchecks.nlp.task_type import TaskType
 from deepchecks.utils.abstracts.prediction_drift import PredictionDriftAbstract
+from deepchecks.utils.distribution.preprocessing import convert_multi_label_to_multi_class
 
 __all__ = ['PredictionDrift']
 
@@ -54,6 +57,8 @@ class PredictionDrift(PredictionDriftAbstract, TrainTestCheck):
         the predicted probability of the positive class if binary. Set to 'proba' to force drift on the predicted
         probabilities, and 'prediction' to force drift on the predicted classes. If set to 'proba', on a multiclass
         task, drift would be calculated on each class independently.
+        For token classification tasks, drift is always calculated on the predictions and not on the probabilities,
+        and this parameter is ignored.
     margin_quantile_filter: float, default: 0.025
         float in range [0,0.5), representing which margins (high and low quantiles) of the distribution will be filtered
         out of the EMD calculation. This is done in order for extreme values not to affect the calculation
@@ -141,22 +146,34 @@ def run_logic(self, context: Context) -> CheckResult:
             value: drift score.
             display: prediction distribution graph, comparing the train and test distributions.
         """
-        context.raise_if_token_classification_task(self)
-
         train_dataset = context.train.sample(self.n_samples, random_state=context.random_state)
         test_dataset = context.test.sample(self.n_samples, random_state=context.random_state)
         model = context.model
 
-        # Flag for computing drift on the probabilities rather than the predicted labels
-        proba_drift = ((len(context.model_classes) == 2) and (self.drift_mode == 'auto')) or \
-                      (self.drift_mode == 'proba')
+        if self.drift_mode == 'proba' and \
+                (context.task_type == TaskType.TOKEN_CLASSIFICATION or context.is_multi_label_task()):
+            warnings.warn('Cannot use drift_mode="proba" for multi-label text classification tasks or token '
+                          'classification tasks. Using drift_mode="prediction" instead.', UserWarning)
 
-        if proba_drift:
-            train_prediction = np.array(model.predict_proba(train_dataset))
-            test_prediction = np.array(model.predict_proba(test_dataset))
+        if context.task_type == TaskType.TOKEN_CLASSIFICATION:
+            train_prediction = np.concatenate(model.predict(train_dataset)).reshape(-1, 1)
+            test_prediction = np.concatenate(model.predict(test_dataset)).reshape(-1, 1)
+            proba_drift = False
         else:
-            train_prediction = np.array(model.predict(train_dataset)).reshape((-1, 1))
-            test_prediction = np.array(model.predict(test_dataset)).reshape((-1, 1))
+            # Flag for computing drift on the probabilities rather than the predicted labels
+            proba_drift = ((len(context.model_classes) == 2) and (self.drift_mode == 'auto')) or \
+                          (self.drift_mode == 'proba')
+
+            if proba_drift:
+                train_prediction = np.array(model.predict_proba(train_dataset))
+                test_prediction = np.array(model.predict_proba(test_dataset))
+            elif context.is_multi_label_task():
+                model_classes = context.model_classes
+                train_prediction = convert_multi_label_to_multi_class(model.predict(train_dataset), model_classes)
+                test_prediction = convert_multi_label_to_multi_class(model.predict(test_dataset), model_classes)
+            else:
+                train_prediction = np.array(model.predict(train_dataset)).reshape((-1, 1))
+                test_prediction = np.array(model.predict(test_dataset)).reshape((-1, 1))
 
         return self._prediction_drift(train_prediction, test_prediction, context.model_classes, context.with_display,
                                       proba_drift, not proba_drift)