Skip to content

Commit

Permalink
custom metrics reuse bug (#1180)
Browse files Browse the repository at this point in the history
* Fix metric.update

* Add copy and reset of alternative metrics

* Remove copy from metrics utils

* Fix lint

* Add copy to metric utils

* Undo copy metrics in image segment performance

* Fix lint

* Fix class performance test

* Fix robustness report
  • Loading branch information
matanper committed Apr 5, 2022
1 parent c5e78f3 commit a93c326
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 11 deletions.
17 changes: 13 additions & 4 deletions deepchecks/vision/checks/performance/image_segment_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def __init__(
self.number_of_bins = number_of_bins
self.number_of_samples_to_infer_bins = number_of_samples_to_infer_bins
self._state = None
self._metrics = None

def initialize_run(self, context: Context, dataset_kind: DatasetKind):
"""Initialize run before starting updating on batches."""
Expand All @@ -78,7 +79,6 @@ def initialize_run(self, context: Context, dataset_kind: DatasetKind):

def update(self, context: Context, batch: Batch, dataset_kind: DatasetKind):
"""Update the bins by the image properties."""
dataset = context.get_data_by_kind(dataset_kind)
images = batch.images
predictions = batch.predictions
labels = batch.labels
Expand All @@ -102,7 +102,8 @@ def update(self, context: Context, batch: Batch, dataset_kind: DatasetKind):
# Check if enough data to infer bins
if len(samples_for_bin) >= self.number_of_samples_to_infer_bins:
# Create the bins and metrics, and divide all cached data into the bins
self._state['bins'] = self._create_bins_and_metrics(samples_for_bin, dataset)
self._state['bins'] = self._create_bins_and_metrics(samples_for_bin,
context.get_data_by_kind(dataset_kind))
# Remove the samples cache which are no longer needed (free the memory)
del samples_for_bin

Expand Down Expand Up @@ -278,11 +279,19 @@ def _add_to_fitting_bin(bins: t.List[t.Dict], property_value, label, prediction)
if single_bin['start'] <= property_value < single_bin['stop']:
single_bin['count'] += 1
for metric in single_bin['metrics'].values():
# Since this is a single prediction and label need to wrap in tensor
metric.update((torch.unsqueeze(prediction, 0), torch.unsqueeze(label, 0)))
# Since this is a single prediction and label need to wrap in tensor/label, in order to pass the
# expected shape to the metric
metric.update((_wrap_torch_or_list(prediction), _wrap_torch_or_list(label)))
return


def _wrap_torch_or_list(value):
"""Unsqueeze the value if it is a tensor or wrap in list otherwise."""
if isinstance(value, torch.Tensor):
return torch.unsqueeze(value, 0)
return [value]


def _range_string(start, stop, precision):
start = '[' + format_number(start, precision) if not np.isinf(start) else '(-inf'
stop = format_number(stop, precision) if not np.isinf(stop) else 'inf'
Expand Down
12 changes: 9 additions & 3 deletions deepchecks/vision/metrics_utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#
"""Module for defining metrics for the vision module."""
import typing as t
from copy import copy

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -67,11 +68,16 @@ def get_scorers_list(
task_type = dataset.task_type

if alternative_scorers:
# Validate that each alternative scorer is a correct type
for _, met in alternative_scorers.items():
# For alternative scorers we create a copy since in suites we are running in parallel, so we can't use the same
# instance for several checks.
scorers = {}
for name, met in alternative_scorers.items():
# Validate that each alternative scorer is a correct type
if not isinstance(met, Metric):
raise DeepchecksValueError('alternative_scorers should contain metrics of type ignite.Metric')
scorers = alternative_scorers
met.reset()
scorers[name] = copy(met)
return scorers
elif task_type == TaskType.CLASSIFICATION:
scorers = get_default_classification_scorers()
elif task_type == TaskType.OBJECT_DETECTION:
Expand Down
4 changes: 2 additions & 2 deletions tests/vision/checks/performance/class_performance_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def test_mnist_alt(mnist_dataset_train, mnist_dataset_test, mock_trained_mnist,
r_row = result.value.loc[result.value['Metric'] == 'r'].sort_values(by='Value', ascending=False).iloc[0]
# Assert
assert_that(len(result.value), equal_to(8))
assert_that(p_row['Value'], close_to(0.975, 0.001))
assert_that(r_row['Value'], close_to(0.985, 0.001))
assert_that(p_row['Value'], close_to(.984, 0.001))
assert_that(r_row['Value'], close_to(0.988, 0.001))


def test_coco_best(coco_train_visiondata, coco_test_visiondata, mock_trained_yolov5_object_detection, device):
Expand Down
4 changes: 2 additions & 2 deletions tests/vision/checks/performance/robustness_report_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_coco_and_condition(coco_train_visiondata, mock_trained_yolov5_object_de
# Assert
assert_that(result.value, has_entries({
'Hue Saturation Value': has_entries({
'AP': has_entries(score=close_to(0.348, 0.001), diff=close_to(-0.107, 0.001)),
'AR': has_entries(score=close_to(0.376, 0.001), diff=close_to(-0.092, 0.001))
'AP': has_entries(score=close_to(0.348, 0.01), diff=close_to(-0.104, 0.01)),
'AR': has_entries(score=close_to(0.376, 0.01), diff=close_to(-0.075, 0.01))
}),
}))
assert_that(result.conditions_results, has_items(
Expand Down

0 comments on commit a93c326

Please sign in to comment.