Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Datumaro] Fix coco import conflict with labels #1548

Merged
merged 5 commits into from
May 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Task/Job buttons has no "Open in new tab" option (<https://github.com/opencv/cvat/pull/1419>)
- Delete point context menu option has no shortcut hint (<https://github.com/opencv/cvat/pull/1416>)
- Fixed issue with unnecessary tag activation in cvat-canvas (<https://github.com/opencv/cvat/issues/1540>)
- Fixed full COCO dataset import error with conflicting labels in keypoints and detection (https://github.com/opencv/cvat/pull/1548)
- Fixed COCO keypoints skeleton parsing and saving (https://github.com/opencv/cvat/issues/1539)

### Security
-
Expand Down
11 changes: 6 additions & 5 deletions datumaro/datumaro/components/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def iou(self, other):
return compute_iou(self.get_bbox(), other.get_bbox())

class PointsCategories(Categories):
Category = namedtuple('Category', ['labels', 'adjacent'])
Category = namedtuple('Category', ['labels', 'joints'])

def __init__(self, items=None, attributes=None):
super().__init__(attributes=attributes)
Expand All @@ -490,12 +490,13 @@ def __init__(self, items=None, attributes=None):
items = {}
self.items = items

def add(self, label_id, labels=None, adjacent=None):
def add(self, label_id, labels=None, joints=None):
if labels is None:
labels = []
if adjacent is None:
adjacent = []
self.items[label_id] = self.Category(labels, set(adjacent))
if joints is None:
joints = []
joints = set(map(tuple, joints))
self.items[label_id] = self.Category(labels, joints)

def __eq__(self, other):
if not super().__eq__(other):
Expand Down
6 changes: 3 additions & 3 deletions datumaro/datumaro/plugins/coco_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def save_categories(self, dataset):
if kp_cat is not None:
cat.update({
'keypoints': [str(l) for l in kp_cat.labels],
'skeleton': [int(i) for i in kp_cat.adjacent],
'skeleton': [list(map(int, j)) for j in kp_cat.joints],
})
self.categories.append(cat)

Expand Down Expand Up @@ -464,8 +464,8 @@ def __init__(self, extractor, save_dir,
self._save_images = save_images

assert segmentation_mode is None or \
segmentation_mode in SegmentationMode or \
isinstance(segmentation_mode, str)
isinstance(segmentation_mode, str) or \
segmentation_mode in SegmentationMode
if segmentation_mode is None:
segmentation_mode = SegmentationMode.guess
if isinstance(segmentation_mode, str):
Expand Down
9 changes: 6 additions & 3 deletions datumaro/datumaro/plugins/coco_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def _load_categories(self, loader):
self._categories = {}

if self._task in [CocoTask.instances, CocoTask.labels,
CocoTask.person_keypoints, CocoTask.stuff, CocoTask.panoptic]:
CocoTask.person_keypoints,
# TODO: Task.stuff, CocoTask.panoptic
]:
label_categories, label_map = self._load_label_categories(loader)
self._categories[AnnotationType.label] = label_categories
self._label_map = label_map
Expand Down Expand Up @@ -101,7 +103,8 @@ def _load_person_kp_categories(self, loader):
for cat in cats:
label_id = self._label_map[cat['id']]
categories.add(label_id=label_id,
labels=cat['keypoints'], adjacent=cat['skeleton'])
labels=cat['keypoints'], joints=cat['skeleton']
)

return categories

Expand Down Expand Up @@ -246,4 +249,4 @@ def __init__(self, path, **kwargs):
class CocoLabelsExtractor(_CocoExtractor):
def __init__(self, path, **kwargs):
kwargs['task'] = CocoTask.labels
super().__init__(path, **kwargs)
super().__init__(path, **kwargs)
6 changes: 3 additions & 3 deletions datumaro/datumaro/plugins/coco_format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
'captions',
'labels', # extension, does not exist in the original COCO format
'image_info',
'panoptic',
'stuff',
# 'panoptic',
# 'stuff',
])

class CocoPath:
IMAGES_DIR = 'images'
ANNOTATIONS_DIR = 'annotations'

IMAGE_EXT = '.jpg'
IMAGE_EXT = '.jpg'
21 changes: 20 additions & 1 deletion datumaro/datumaro/plugins/coco_format/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,26 @@ def __call__(self, path, **extra_params):
if len(subsets) == 0:
raise Exception("Failed to find 'coco' dataset at '%s'" % path)

# TODO: should be removed when proper label merging is implemented
conflicting_types = {CocoTask.instances,
CocoTask.person_keypoints, CocoTask.labels}
ann_types = set(t for s in subsets.values() for t in s) \
& conflicting_types
if 1 <= len(ann_types):
selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0]
if 1 < len(ann_types):
log.warning("Not implemented: "
"Found potentially conflicting source types with labels: %s. "
"Only one type will be used: %s" \
% (", ".join(t.name for t in ann_types), selected_ann_type.name))

for ann_files in subsets.values():
for ann_type, ann_file in ann_files.items():
if ann_type in conflicting_types:
if ann_type is not selected_ann_type:
log.warning("Not implemented: "
"conflicting source '%s' is skipped." % ann_file)
continue
log.info("Found a dataset at '%s'" % ann_file)

source_name = osp.splitext(osp.basename(ann_file))[0]
Expand Down Expand Up @@ -71,6 +89,7 @@ def find_subsets(path):
"type '%s', the only known are: %s" % \
(subset_path, ann_type,
', '.join([e.name for e in CocoTask])))
continue
subset_name = name_parts[1]
subsets[subset_name][ann_type] = subset_path
return dict(subsets)
return dict(subsets)
2 changes: 1 addition & 1 deletion datumaro/datumaro/plugins/datumaro_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _convert_points_categories(self, obj):
converted['items'].append({
'label_id': int(label_id),
'labels': [cast(label, str) for label in item.labels],
'adjacent': [int(v) for v in item.adjacent],
'joints': [list(map(int, j)) for j in item.joints],
})
return converted

Expand Down
2 changes: 1 addition & 1 deletion datumaro/datumaro/plugins/datumaro_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _load_categories(parsed):
point_categories = PointsCategories()
for item in parsed_points_cat['items']:
point_categories.add(int(item['label_id']),
item['labels'], adjacent=item['adjacent'])
item['labels'], joints=item['joints'])

categories[AnnotationType.points] = point_categories

Expand Down
21 changes: 4 additions & 17 deletions datumaro/tests/test_coco_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def test_can_save_and_load_keypoints(self):
points_categories = PointsCategories()
for i in range(10):
label_categories.add(str(i))
points_categories.add(i, [])
points_categories.add(i, joints=[[0, 1], [1, 2]])
categories = {
AnnotationType.label: label_categories,
AnnotationType.points: points_categories,
Expand Down Expand Up @@ -624,25 +624,12 @@ def test_can_save_dataset_with_no_subsets(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, annotations=[
Label(2, id=1, group=1),
]),

DatasetItem(id=2, annotations=[
Label(3, id=2, group=2),
]),
DatasetItem(id=1),
DatasetItem(id=2),
])

def categories(self):
label_cat = LabelCategories()
point_cat = PointsCategories()
for label in range(10):
label_cat.add('label_' + str(label))
point_cat.add(label)
return {
AnnotationType.label: label_cat,
AnnotationType.points: point_cat,
}
return { AnnotationType.label: LabelCategories() }

with TestDir() as test_dir:
self._test_save_and_load(TestExtractor(),
Expand Down
2 changes: 1 addition & 1 deletion datumaro/tests/test_datumaro_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def categories(self):

points_categories = PointsCategories()
for index, _ in enumerate(label_categories.items):
points_categories.add(index, ['cat1', 'cat2'], adjacent=[0, 1])
points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])

return {
AnnotationType.label: label_categories,
Expand Down