deepchecks · ItayGabbay · Feb 15, 2022 · Feb 15, 2022 · Feb 15, 2022 · Feb 15, 2022
@@ -35,19 +35,53 @@ class ClassPerformance(TrainTestCheck):
     ----------
     alternative_metrics : List[Metric], default: None
         A list of ignite.Metric objects whose score should be used. If None are given, use the default metrics.
+    n_to_show : int, default: 20
+        Number of classes to show in the report. If None, show all classes.
+    show_only : str, default: 'largest'
+        Specify which classes to show in the report. Can be one of the following:
+        - 'largest': Show the largest classes.
+        - 'smallest': Show the smallest classes.
+        - 'random': Show random classes.
+        - 'best': Show the classes with the highest score.
+        - 'worst': Show the classes with the lowest score.
+    metric_to_show_by : str, default: None
+        Specify the metric to sort the results by. Relevant only when show_only is 'best' or 'worst'.
+        If not specified, for classification tasks the default is Precision. for object detection tasks the default is
+        mAP.
     """
 
     def __init__(self,
-                 alternative_metrics: List[Metric] = None
-                 ):
+                 alternative_metrics: List[Metric] = None,
+                 n_to_show: int = 20,
+                 show_only: str = 'largest',
+                 metric_to_show_by: str = None):
         super().__init__()
         self.alternative_metrics = alternative_metrics
+        self.n_to_show = n_to_show
+
+        if show_only not in ['largest', 'smallest', 'random', 'best', 'worst']:
+            raise DeepchecksValueError(f'Invalid value for show_only: {show_only}. Should be one of: '
+                                       f'["largest", "smallest", "random", "best", "worst"]')
+
+        self.show_only = show_only
+        if alternative_metrics is not None and show_only in ['best', 'worst'] and metric_to_show_by is None:
+            raise DeepchecksValueError('When alternative_metrics are provided and show_only is be one of: '
+                                       '["best", "worst"], metric_to_show_by must be specified.')
+        self.metric_to_show_by = metric_to_show_by
         self._state = {}
 
     def initialize_run(self, context: Context):
         """Initialize run by creating the _state member with metrics for train and test."""
         context.assert_task_type(TaskType.CLASSIFICATION, TaskType.OBJECT_DETECTION)
 
+        if not self.metric_to_show_by:
+            if context.train.task_type == TaskType.CLASSIFICATION:
+                self.metric_to_show_by = 'Precision'
+            elif context.train.task_type == TaskType.OBJECT_DETECTION:
+                self.metric_to_show_by = 'mAP'
+            else:
+                raise DeepchecksValueError(f'Invalid task type: {context.train.task_type}')
+
         self._state = {DatasetKind.TRAIN: {}, DatasetKind.TEST: {}}
         self._state[DatasetKind.TRAIN]['scorers'] = get_scorers_list(context.train, self.alternative_metrics)
         self._state[DatasetKind.TEST]['scorers'] = get_scorers_list(context.train, self.alternative_metrics)
@@ -75,6 +109,10 @@ def compute(self, context: Context) -> CheckResult:
 
         results_df = pd.concat(results)
 
+        if self.n_to_show is not None:
+            classes_to_show = self._filter_classes(results_df)
+            results_df = results_df.loc[results_df['Class'].isin(classes_to_show)]
+
         fig = px.histogram(
             results_df,
             x='Class',
@@ -102,6 +140,25 @@ def compute(self, context: Context) -> CheckResult:
             display=fig
         )
 
+    def _filter_classes(self, metrics_df: pd.DataFrame) -> list:
+        # working only on the test set, and on the precision metric
+        tests_metrics_df = metrics_df[(metrics_df['Dataset'] == DatasetKind.TEST.value) &
+                                      (metrics_df['Metric'] == self.metric_to_show_by)]
+        if self.show_only == 'largest':
+            tests_metrics_df = tests_metrics_df.sort_values(by='Number of samples', ascending=False)
+        elif self.show_only == 'smallest':
+            tests_metrics_df = tests_metrics_df.sort_values(by='Number of samples', ascending=True)
+        elif self.show_only == 'random':
+            tests_metrics_df = tests_metrics_df.sample(frac=1)
+        elif self.show_only == 'best':
+            tests_metrics_df = tests_metrics_df.sort_values(by='Value', ascending=False)
+        elif self.show_only == 'worst':
+            tests_metrics_df = tests_metrics_df.sort_values(by='Value', ascending=True)
+        else:
+            raise ValueError(f'Unknown show_only value: {self.show_only}')
+
+        return tests_metrics_df.head(self.n_to_show)['Class'].to_list()
+
     def add_condition_test_performance_not_less_than(self: PR, min_score: float) -> PR:
         """Add condition - metric scores are not less than given score.
 

@@ -0,0 +1,100 @@
+# ----------------------------------------------------------------------------
+# Copyright (C) 2021 Deepchecks (https://www.deepchecks.com)
+#
+# This file is part of Deepchecks.
+# Deepchecks is distributed under the terms of the GNU Affero General
+# Public License (version 3 or later).
+# You should have received a copy of the GNU Affero General Public License
+# along with Deepchecks.  If not, see <http://www.gnu.org/licenses/>.
+# ----------------------------------------------------------------------------
+#
+from deepchecks.vision.checks.performance.class_performance import ClassPerformance
+from deepchecks.vision.datasets.detection.coco import yolo_prediction_formatter
+from deepchecks.vision.utils import DetectionPredictionFormatter
+from deepchecks.vision.utils.classification_formatters import ClassificationPredictionFormatter
+
+import torch.nn as nn
+
+from hamcrest import assert_that, has_entries, close_to, equal_to, is_in
+from tests.vision.vision_conftest import *
+
+
+def test_mnist_largest(mnist_dataset_train, mnist_dataset_test, trained_mnist):
+    # Arrange
+
+    check = ClassPerformance(n_to_show=2, show_only='largest')
+    # Act
+    result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
+                       prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
+    first_row = result.value.sort_values(by='Number of samples', ascending=False).iloc[0]
+    # Assert
+    assert_that(len(result.value), equal_to(8))
+    assert_that(first_row['Value'], close_to(0.991532, 0.01))
+    assert_that(first_row['Number of samples'], equal_to(6742))
+    assert_that(first_row['Class'], equal_to(1))
+
+
+def test_mnist_smallest(mnist_dataset_train, mnist_dataset_test, trained_mnist):
+    # Arrange
+
+    check = ClassPerformance(n_to_show=2, show_only='smallest')
+    # Act
+    result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
+                       prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
+    first_row = result.value.sort_values(by='Number of samples', ascending=True).iloc[0]
+
+    # Assert
+    assert_that(len(result.value), equal_to(8))
+    assert_that(first_row['Value'], close_to(0.988739, 0.01))
+    assert_that(first_row['Number of samples'], equal_to(892))
+    assert_that(first_row['Class'], equal_to(5))
+
+
+def test_mnist_worst(mnist_dataset_train, mnist_dataset_test, trained_mnist):
+    # Arrange
+
+    check = ClassPerformance(n_to_show=2, show_only='worst')
+    # Act
+    result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
+                       prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
+    first_row = result.value.loc[result.value['Metric'] == 'Precision'].sort_values(by='Value', ascending=True).iloc[0]
+
+    # Assert
+    assert_that(len(result.value), equal_to(8))
+    assert_that(first_row['Value'], close_to(0.977713, 0.01))
+    assert_that(first_row['Number of samples'], equal_to(1028))
+    assert_that(first_row['Class'], equal_to(7))
+
+
+def test_mnist_best(mnist_dataset_train, mnist_dataset_test, trained_mnist):
+    # Arrange
+
+    check = ClassPerformance(n_to_show=2, show_only='best')
+    # Act
+    result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
+                       prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
+    first_row = result.value.loc[result.value['Metric'] == 'Precision'].sort_values(by='Value', ascending=False).iloc[0]
+
+    # Assert
+    assert_that(len(result.value), equal_to(8))
+    assert_that(first_row['Value'], close_to(0.990854, 0.01))
+    assert_that(first_row['Number of samples'], equal_to(982))
+    assert_that(first_row['Class'], equal_to(4))
+
+
+def test_coco_best(coco_train_visiondata, coco_test_visiondata, trained_yolov5_object_detection):
+    # Arrange
+    pred_formatter = DetectionPredictionFormatter(yolo_prediction_formatter)
+    check = ClassPerformance(n_to_show=2, show_only='best')
+    # Act
+    result = check.run(coco_train_visiondata, coco_test_visiondata,
+                       trained_yolov5_object_detection, prediction_formatter=pred_formatter)
+    first_row = result.value.loc[result.value['Metric'] == 'mAP'].sort_values(by='Value', ascending=False).iloc[0]
+
+    assert_that(len(result.value), equal_to(4))
+    assert_that(first_row['Value'], close_to(0.990854, 0.01))
+    assert_that(first_row['Number of samples'], equal_to(1))
+    assert_that(first_row['Class'], is_in([28, 40]))
+
+    # Assert
+