Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding class display limit in ClassPerformance check #866

Merged
merged 10 commits into from
Feb 15, 2022
61 changes: 59 additions & 2 deletions deepchecks/vision/checks/performance/class_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,53 @@ class ClassPerformance(TrainTestCheck):
----------
alternative_metrics : List[Metric], default: None
A list of ignite.Metric objects whose score should be used. If None are given, use the default metrics.
n_to_show : int, default: 20
Number of classes to show in the report. If None, show all classes.
show_only : str, default: 'largest'
Specify which classes to show in the report. Can be one of the following:
- 'largest': Show the largest classes.
- 'smallest': Show the smallest classes.
- 'random': Show random classes.
- 'best': Show the classes with the highest score.
- 'worst': Show the classes with the lowest score.
metric_to_show_by : str, default: None
Specify the metric to sort the results by. Relevant only when show_only is 'best' or 'worst'.
If not specified, for classification tasks the default is Precision. for object detection tasks the default is
ItayGabbay marked this conversation as resolved.
Show resolved Hide resolved
mAP.
"""

def __init__(self,
alternative_metrics: List[Metric] = None
):
alternative_metrics: List[Metric] = None,
n_to_show: int = 20,
show_only: str = 'largest',
metric_to_show_by: str = None):
super().__init__()
self.alternative_metrics = alternative_metrics
self.n_to_show = n_to_show

if show_only not in ['largest', 'smallest', 'random', 'best', 'worst']:
raise DeepchecksValueError(f'Invalid value for show_only: {show_only}. Should be one of: '
f'["largest", "smallest", "random", "best", "worst"]')

self.show_only = show_only
if alternative_metrics is not None and show_only in ['best', 'worst'] and metric_to_show_by is None:
raise DeepchecksValueError('When alternative_metrics are provided and show_only is be one of: '
ItayGabbay marked this conversation as resolved.
Show resolved Hide resolved
'["best", "worst"], metric_to_show_by must be specified.')
self.metric_to_show_by = metric_to_show_by
self._state = {}

def initialize_run(self, context: Context):
"""Initialize run by creating the _state member with metrics for train and test."""
context.assert_task_type(TaskType.CLASSIFICATION, TaskType.OBJECT_DETECTION)

if not self.metric_to_show_by:
if context.train.task_type == TaskType.CLASSIFICATION:
self.metric_to_show_by = 'Precision'
ItayGabbay marked this conversation as resolved.
Show resolved Hide resolved
elif context.train.task_type == TaskType.OBJECT_DETECTION:
self.metric_to_show_by = 'mAP'
else:
raise DeepchecksValueError(f'Invalid task type: {context.train.task_type}')

self._state = {DatasetKind.TRAIN: {}, DatasetKind.TEST: {}}
self._state[DatasetKind.TRAIN]['scorers'] = get_scorers_list(context.train, self.alternative_metrics)
self._state[DatasetKind.TEST]['scorers'] = get_scorers_list(context.train, self.alternative_metrics)
Expand Down Expand Up @@ -75,6 +109,10 @@ def compute(self, context: Context) -> CheckResult:

results_df = pd.concat(results)

if self.n_to_show is not None:
classes_to_show = self._filter_classes(results_df)
results_df = results_df.loc[results_df['Class'].isin(classes_to_show)]

fig = px.histogram(
results_df,
x='Class',
Expand Down Expand Up @@ -102,6 +140,25 @@ def compute(self, context: Context) -> CheckResult:
display=fig
)

def _filter_classes(self, metrics_df: pd.DataFrame) -> list:
# working only on the test set, and on the precision metric
ItayGabbay marked this conversation as resolved.
Show resolved Hide resolved
tests_metrics_df = metrics_df[(metrics_df['Dataset'] == DatasetKind.TEST.value) &
(metrics_df['Metric'] == self.metric_to_show_by)]
if self.show_only == 'largest':
tests_metrics_df = tests_metrics_df.sort_values(by='Number of samples', ascending=False)
elif self.show_only == 'smallest':
tests_metrics_df = tests_metrics_df.sort_values(by='Number of samples', ascending=True)
elif self.show_only == 'random':
tests_metrics_df = tests_metrics_df.sample(frac=1)
elif self.show_only == 'best':
tests_metrics_df = tests_metrics_df.sort_values(by='Value', ascending=False)
elif self.show_only == 'worst':
tests_metrics_df = tests_metrics_df.sort_values(by='Value', ascending=True)
else:
raise ValueError(f'Unknown show_only value: {self.show_only}')

return tests_metrics_df.head(self.n_to_show)['Class'].to_list()

def add_condition_test_performance_not_less_than(self: PR, min_score: float) -> PR:
"""Add condition - metric scores are not less than given score.

Expand Down
100 changes: 100 additions & 0 deletions tests/vision/checks/performance/class_performance_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# ----------------------------------------------------------------------------
# Copyright (C) 2021 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
from deepchecks.vision.checks.performance.class_performance import ClassPerformance
from deepchecks.vision.datasets.detection.coco import yolo_prediction_formatter
from deepchecks.vision.utils import DetectionPredictionFormatter
from deepchecks.vision.utils.classification_formatters import ClassificationPredictionFormatter

import torch.nn as nn

from hamcrest import assert_that, has_entries, close_to, equal_to, is_in
from tests.vision.vision_conftest import *


def test_mnist_largest(mnist_dataset_train, mnist_dataset_test, trained_mnist):
# Arrange

check = ClassPerformance(n_to_show=2, show_only='largest')
# Act
result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
first_row = result.value.sort_values(by='Number of samples', ascending=False).iloc[0]
# Assert
assert_that(len(result.value), equal_to(8))
assert_that(first_row['Value'], close_to(0.991532, 0.01))
assert_that(first_row['Number of samples'], equal_to(6742))
assert_that(first_row['Class'], equal_to(1))


def test_mnist_smallest(mnist_dataset_train, mnist_dataset_test, trained_mnist):
# Arrange

check = ClassPerformance(n_to_show=2, show_only='smallest')
# Act
result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
first_row = result.value.sort_values(by='Number of samples', ascending=True).iloc[0]

# Assert
assert_that(len(result.value), equal_to(8))
assert_that(first_row['Value'], close_to(0.988739, 0.01))
assert_that(first_row['Number of samples'], equal_to(892))
assert_that(first_row['Class'], equal_to(5))


def test_mnist_worst(mnist_dataset_train, mnist_dataset_test, trained_mnist):
# Arrange

check = ClassPerformance(n_to_show=2, show_only='worst')
# Act
result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
first_row = result.value.loc[result.value['Metric'] == 'Precision'].sort_values(by='Value', ascending=True).iloc[0]

# Assert
assert_that(len(result.value), equal_to(8))
assert_that(first_row['Value'], close_to(0.977713, 0.01))
assert_that(first_row['Number of samples'], equal_to(1028))
assert_that(first_row['Class'], equal_to(7))


def test_mnist_best(mnist_dataset_train, mnist_dataset_test, trained_mnist):
# Arrange

check = ClassPerformance(n_to_show=2, show_only='best')
# Act
result = check.run(mnist_dataset_train, mnist_dataset_test, trained_mnist,
prediction_formatter=ClassificationPredictionFormatter(nn.Softmax(dim=1)))
first_row = result.value.loc[result.value['Metric'] == 'Precision'].sort_values(by='Value', ascending=False).iloc[0]

# Assert
assert_that(len(result.value), equal_to(8))
assert_that(first_row['Value'], close_to(0.990854, 0.01))
assert_that(first_row['Number of samples'], equal_to(982))
assert_that(first_row['Class'], equal_to(4))


def test_coco_best(coco_train_visiondata, coco_test_visiondata, trained_yolov5_object_detection):
# Arrange
pred_formatter = DetectionPredictionFormatter(yolo_prediction_formatter)
check = ClassPerformance(n_to_show=2, show_only='best')
# Act
result = check.run(coco_train_visiondata, coco_test_visiondata,
trained_yolov5_object_detection, prediction_formatter=pred_formatter)
first_row = result.value.loc[result.value['Metric'] == 'mAP'].sort_values(by='Value', ascending=False).iloc[0]

assert_that(len(result.value), equal_to(4))
assert_that(first_row['Value'], close_to(0.990854, 0.01))
assert_that(first_row['Number of samples'], equal_to(1))
assert_that(first_row['Class'], is_in([28, 40]))

# Assert