/
detection_precision_recall.py
391 lines (339 loc) · 17.3 KB
/
detection_precision_recall.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""Module for calculating detection precision and recall."""
import warnings
from collections import defaultdict
from typing import List, Optional, Tuple, Union
import numpy as np
from ignite.metrics import Metric
from ignite.metrics.metric import reinit__is_reduced, sync_all_reduce
from deepchecks.core.errors import DeepchecksValueError
from deepchecks.vision.metrics_utils.metric_mixin import MetricMixin, ObjectDetectionMetricMixin
def _dict_conc(test_list):
result = defaultdict(list)
for i in range(len(test_list)):
current = test_list[i]
for key, value in current.items():
if isinstance(value, list):
for j in range(len(value)):
result[key].append(value[j])
else:
result[key].append(value)
return result
class AveragePrecisionRecall(Metric, MetricMixin):
"""Abstract class to calculate average precision and recall for various vision tasks.
Parameters
----------
max_dets: Union[List[int], Tuple[int]], default: [1, 10, 100]
Maximum number of detections per class.
area_range: tuple, default: (32**2, 96**2)
Slices for small/medium/large buckets.
return_option: str, default: 'ap'
ap: ap only, ar: ar only, None: all (not ignite compliant).
average: str, default: none
The method for averaging over the classes. If none, returns the result per class.
iou_range: Tuple[float, float, float], default: (0.5, 0.95, 10)
The closed range of the iou values (min, max (including), number of points)
"""
def __init__(self, *args, max_dets: Union[List[int], Tuple[int]] = (1, 10, 100),
area_range: Tuple = (32 ** 2, 96 ** 2),
return_option: Optional[str] = 'ap',
average: str = 'none',
iou_range: Tuple[float, float, float] = (0.5, 0.95, 10),
**kwargs):
super().__init__(*args, **kwargs)
self.return_option = return_option
if self.return_option is not None:
max_dets = [max_dets[-1]]
self.area_ranges_names = ['all']
else:
self.area_ranges_names = ['small', 'medium', 'large', 'all']
self.iou_thresholds = np.linspace(*iou_range, endpoint=True)
self.max_detections_per_class = max_dets
self.area_range = area_range
if average in ['none', 'macro', 'weighted']:
self.average = average
else:
raise DeepchecksValueError('average should be one of: \'none\', \'macro\', \'weighted\'')
self.get_mean_value = self.average != 'none'
@reinit__is_reduced
def reset(self):
"""Reset metric state."""
super().reset()
self._evals = defaultdict(lambda: {'scores': [], 'matched': [], 'NP': []})
self.i = 0
@reinit__is_reduced
def update(self, output):
"""Update metric with batch of samples."""
for detected, ground_truth in zip(output[0], output[1]):
self._group_detections(detected, ground_truth)
self.i += 1
@sync_all_reduce('_evals')
def compute(self):
"""Compute metric value."""
# now reduce accumulations
sorted_classes = [int(class_id) for class_id in sorted(self._evals.keys())]
max_class = max(sorted_classes)
for class_id in sorted_classes:
acc = self._evals[class_id]
acc['scores'] = _dict_conc(acc['scores'])
acc['matched'] = _dict_conc(acc['matched'])
acc['NP'] = _dict_conc(acc['NP'])
reses = {'precision': -np.ones((len(self.iou_thresholds),
len(self.area_ranges_names),
len(self.max_detections_per_class),
max_class + 1)),
'recall': -np.ones((len(self.iou_thresholds),
len(self.area_ranges_names),
len(self.max_detections_per_class),
max_class + 1))}
classes_counts = -np.ones((len(self.iou_thresholds),
len(self.area_ranges_names),
len(self.max_detections_per_class),
max_class + 1))
for iou_i, min_iou in enumerate(self.iou_thresholds):
for dets_i, dets in enumerate(self.max_detections_per_class):
for area_i, area_size in enumerate(self.area_ranges_names):
precision_list = np.empty(max_class + 1)
precision_list.fill(np.nan)
recall_list = np.empty(max_class + 1)
recall_list.fill(np.nan)
counts_list = np.empty(max_class + 1)
counts_list.fill(np.nan)
# run ap calculation per-class
for class_id in sorted_classes:
ev = self._evals[class_id]
class_counts = np.nansum(np.array(ev['NP'][(area_size, dets, min_iou)]))
precision, recall = \
self._compute_ap_recall(np.array(ev['scores'][(area_size, dets, min_iou)]),
np.array(ev['matched'][(area_size, dets, min_iou)]),
class_counts)
precision_list[class_id] = precision
recall_list[class_id] = recall
counts_list[class_id] = np.nan if recall == -1 else class_counts
reses['precision'][iou_i, area_i, dets_i] = precision_list
reses['recall'][iou_i, area_i, dets_i] = recall_list
classes_counts[iou_i, area_i, dets_i] = counts_list
if self.average == 'weighted':
classes_counts = self.filter_res(classes_counts,
area=self.area_ranges_names[0], max_dets=self.max_detections_per_class[0])
classes_counts = np.nanmean(classes_counts, axis=(0, 1, 2))
classes_counts = classes_counts[~np.isnan(classes_counts)]
class_weights = 1.0 / classes_counts
class_weights = class_weights / np.sum(class_weights)
else:
class_weights = None
if self.return_option == 'ap':
return self.get_classes_scores_at(reses['precision'],
max_dets=self.max_detections_per_class[0],
area=self.area_ranges_names[0],
get_mean_val=self.get_mean_value,
class_weights=class_weights)
elif self.return_option == 'ar':
return self.get_classes_scores_at(reses['recall'],
max_dets=self.max_detections_per_class[0],
area=self.area_ranges_names[0],
get_mean_val=self.get_mean_value,
class_weights=class_weights)
return [reses]
def _group_detections(self, detected, ground_truth):
"""Group gts and dts on a imageXclass basis."""
# Calculating pairwise IoUs on classes
bb_info = self.group_class_detection_label(detected, ground_truth)
ious = {k: self.calc_pairwise_ious(v['detected'], v['ground_truth']) for k, v in bb_info.items()}
for class_id in ious.keys():
image_evals = self._evaluate_image(
bb_info[class_id]['detected'],
bb_info[class_id]['ground_truth'],
ious[class_id]
)
acc = self._evals[class_id]
acc['scores'].append(image_evals['scores'])
acc['matched'].append(image_evals['matched'])
acc['NP'].append(image_evals['NP'])
def _evaluate_image(self, detections, ground_truths, ious):
"""Evaluate image."""
# Sort detections by decreasing confidence
confidences = self.get_confidences(detections)
areas = self.get_detection_areas(detections)
sorted_confidence_ids = np.argsort(confidences, kind='stable')[::-1]
orig_ious = ious
orig_gt = ground_truths
ground_truth_area = np.array(self.get_labels_areas(ground_truths))
scores = {}
matched = {}
n_gts = {}
for top_n_detections in self.max_detections_per_class:
for area_size in self.area_ranges_names:
# sort list of dts and chop by max dets
top_detections_idx = sorted_confidence_ids[:top_n_detections]
ious = orig_ious[top_detections_idx]
ground_truth_to_ignore = [self._is_ignore_area(gt_area, area_size) for gt_area in ground_truth_area]
# sort gts by ignore last
gt_sort = np.argsort(ground_truth_to_ignore, kind='stable')
ground_truths = [orig_gt[idx] for idx in gt_sort]
ground_truth_to_ignore = [ground_truth_to_ignore[idx] for idx in gt_sort]
ious = ious[:, gt_sort]
for min_iou in self.iou_thresholds:
detection_matches = \
self._get_best_matches(top_detections_idx, min_iou, ground_truths, ground_truth_to_ignore, ious)
# generate ignore list for dts
detections_to_ignore = [
ground_truth_to_ignore[detection_matches[d_idx]] if d_idx in detection_matches
else self._is_ignore_area(areas[real_index], area_size)
for d_idx, real_index in enumerate(top_detections_idx)
]
# get score for non-ignored dts
scores[(area_size, top_n_detections, min_iou)] = \
[confidences[real_index] for d_idx, real_index in enumerate(top_detections_idx)
if not detections_to_ignore[d_idx]]
matched[(area_size, top_n_detections, min_iou)] = \
[d_idx in detection_matches for d_idx, real_index in enumerate(top_detections_idx)
if not detections_to_ignore[d_idx]]
n_gts[(area_size, top_n_detections, min_iou)] = \
len([g_idx for g_idx in range(len(ground_truths)) if not ground_truth_to_ignore[g_idx]])
return {'scores': scores, 'matched': matched, 'NP': n_gts}
def _get_best_matches(self, dt, min_iou, ground_truths, ground_truth_to_ignore, ious):
ground_truth_matched = {}
detection_matches = {}
for d_idx in range(len(dt)):
# information about best match so far (best_match=-1 -> unmatched)
best_iou = min(min_iou, 1 - 1e-10)
best_match = -1
for g_idx in range(len(ground_truths)):
# if this gt already matched, continue
if g_idx in ground_truth_matched:
continue
# if dt matched and currently on ignore gt, stop
# this exists to allow for matching ignored ground truth, so that we ignore this detection
if best_match > -1 and ground_truth_to_ignore[g_idx]:
break
if ious[d_idx, g_idx] >= best_iou:
best_iou = ious[d_idx, g_idx]
best_match = g_idx
if best_match != -1:
detection_matches[d_idx] = best_match
ground_truth_matched[best_match] = d_idx
return detection_matches
def _compute_ap_recall(self, scores, matched, n_positives, recall_thresholds=None):
if n_positives == 0:
return -1, -1
# by default evaluate on 101 recall levels
if recall_thresholds is None:
recall_thresholds = np.linspace(0.0,
1.00,
int(np.round((1.00 - 0.0) / 0.01)) + 1,
endpoint=True)
# sort in descending score order
inds = np.argsort(-scores, kind='mergesort')
scores = scores[inds]
matched = matched[inds]
if len(matched):
tp = np.cumsum(matched)
fp = np.cumsum(~matched)
rc = tp / n_positives
pr = tp / (tp + fp + np.spacing(1))
# make precision monotonically decreasing
i_pr = np.maximum.accumulate(pr[::-1])[::-1]
rec_idx = np.searchsorted(rc, recall_thresholds, side='left')
# get interpolated precision values at the evaluation thresholds
i_pr = np.array([i_pr[r] if r < len(i_pr) else 0 for r in rec_idx])
return np.mean(i_pr), rc[-1]
return 0, 0
def _is_ignore_area(self, area_bb, area_size):
"""Generate ignored gt list by area_range."""
if area_size == 'small':
return not area_bb < self.area_range[0]
if area_size == 'medium':
return not self.area_range[0] <= area_bb <= self.area_range[1]
if area_size == 'large':
return not area_bb > self.area_range[1]
return False
def filter_res(self, res: np.ndarray, iou: float = None, area: str = None, max_dets: int = None):
"""Get the value of a result by the filtering values.
Parameters
----------
res: np.array
either precision or recall when using the '2' return option
iou : float, default: None
filter by iou threshold
area : str, default: None
filter by are range name ['small', 'medium', 'large', 'all']
max_dets : int, default: None
filter by max detections
Returns
-------
np.array
The filtered result.
"""
if iou:
iou_i = [i for i, iou_thres in enumerate(self.iou_thresholds) if iou == iou_thres]
res = res[iou_i, :, :, :]
if area:
area_i = [i for i, area_name in enumerate(self.area_ranges_names) if area == area_name]
res = res[:, area_i, :, :]
if max_dets:
dets_i = [i for i, det in enumerate(self.max_detections_per_class) if max_dets == det]
res = res[:, :, dets_i, :]
return res
def get_classes_scores_at(self, res: np.ndarray, iou: float = None, area: str = None, max_dets: int = None,
get_mean_val: bool = True, zeroed_negative: bool = True, class_weights: np.ndarray = None
):
"""Get the mean value of the classes scores and the result values.
Parameters
----------
res: np.array
either precision or recall when using the '2' return option
iou : float, default: None
filter by iou threshold
area : str, default: None
filter by are range name ['small', 'medium', 'large', 'all']
max_dets : int, default: None
filter by max detections
get_mean_val : bool, default: True
get mean value if True, if False get per class
zeroed_negative : bool, default: True
if getting the class results list set negative (-1) values to 0
class_weights : np.array, default None
The class weights for weighted macro averaging. If None, gives equal weights to all the classes.
Returns
-------
Union[List[float], float]
The mean value of the classes scores or the scores list.
"""
res = self.filter_res(res, iou, area, max_dets)
with warnings.catch_warnings():
warnings.simplefilter(action='ignore', category=RuntimeWarning)
res = np.nanmean(res[:, :, :], axis=0)
if get_mean_val:
filtered_res = res[~np.isnan(res) & (res > -1)]
num_classes = filtered_res.shape[-1]
if class_weights is None:
class_weights = np.empty(num_classes)
class_weights.fill(1 / num_classes)
if len(class_weights) != num_classes:
raise DeepchecksValueError('The class weights shape must match the number of classes')
weighted_result = np.dot(filtered_res, class_weights)
return weighted_result
if zeroed_negative:
res = res.clip(min=0)
return res[0][0]
class ObjectDetectionAveragePrecision(AveragePrecisionRecall, ObjectDetectionMetricMixin):
"""Calculate average precision and recall for object detection.
Parameters
----------
max_dets: Union[List[int], Tuple[int]], default: [1, 10, 100]
Maximum number of detections per class.
area_range: tuple, default: (32**2, 96**2)
Slices for small/medium/large buckets.
return_option: str, default: 'ap'
ap: ap only, ar: ar only, None: all (not ignite complient)
"""