From f8077148701e303e42e70066cbc0b9a610fe344c Mon Sep 17 00:00:00 2001 From: zhiltsov-max Date: Mon, 13 Jul 2020 17:56:11 +0300 Subject: [PATCH] [Datumaro] Image control in converters (#1799) * Update converter interface, add image control * Update bindings * update changelog * Fix build --- CHANGELOG.md | 3 + cvat/apps/dataset_manager/formats/coco.py | 5 +- .../formats/datumaro/__init__.py | 7 +- cvat/apps/dataset_manager/formats/labelme.py | 4 +- cvat/apps/dataset_manager/formats/mask.py | 12 +- cvat/apps/dataset_manager/formats/mot.py | 5 +- .../dataset_manager/formats/pascal_voc.py | 5 +- cvat/apps/dataset_manager/formats/tfrecord.py | 5 +- cvat/apps/dataset_manager/formats/yolo.py | 4 +- .../datumaro/cli/contexts/project/__init__.py | 8 +- datumaro/datumaro/components/converter.py | 82 ++++++-- datumaro/datumaro/components/project.py | 10 +- .../datumaro/plugins/coco_format/converter.py | 178 +++++++----------- .../datumaro/plugins/cvat_format/converter.py | 54 +----- .../plugins/datumaro_format/converter.py | 70 ++----- datumaro/datumaro/plugins/image_dir.py | 37 ++-- datumaro/datumaro/plugins/labelme_format.py | 41 ++-- datumaro/datumaro/plugins/mot_format.py | 29 +-- .../tf_detection_api_format/converter.py | 45 +++-- .../tf_detection_api_format/extractor.py | 3 +- .../plugins/tf_detection_api_format/format.py | 4 +- .../datumaro/plugins/voc_format/converter.py | 170 +++++++---------- .../datumaro/plugins/yolo_format/converter.py | 34 +--- datumaro/datumaro/util/__init__.py | 4 + datumaro/tests/test_coco_format.py | 40 ++-- datumaro/tests/test_cvat_format.py | 7 +- datumaro/tests/test_datumaro_format.py | 7 +- datumaro/tests/test_image_dir_format.py | 4 +- datumaro/tests/test_labelme_format.py | 8 +- datumaro/tests/test_mot_format.py | 5 +- datumaro/tests/test_project.py | 8 - datumaro/tests/test_tfrecord_format.py | 15 +- datumaro/tests/test_voc_format.py | 61 +++--- datumaro/tests/test_yolo_format.py | 10 +- 34 files changed, 434 insertions(+), 550 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0a7ef8ca10..d2508ba4bd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Appearance block in attribute annotation mode () - Keyframe navigations and some switchers in attribute annotation mode () - [Datumaro] Added `convert` command to convert datasets directly () +- [Datumaro] Added an option to specify image extension when exporting datasets () +- [Datumaro] Added image copying when exporting datasets, if possible () ### Changed - Removed information about e-mail from the basic user information () @@ -65,6 +67,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added support for attributes in VOC XML format (https://github.com/opencv/cvat/pull/1792) - Added annotation attributes in COCO format (https://github.com/opencv/cvat/pull/1782) - Colorized object items in the side panel () +- [Datumaro] Annotation-less files are not generated anymore in COCO format, unless tasks explicitly requested () ### Deprecated - diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 41d6343a3dc..84472d3aab1 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -18,9 +18,8 @@ def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('coco_instances', - save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('coco_instances').convert(extractor, + save_dir=temp_dir, save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index 59c423d0716..9dd3f9abc27 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -48,11 +48,10 @@ def _save_image_info(save_dir, task_data): def _export(self, task_data, save_dir, save_images=False): dataset = CvatTaskDataExtractor(task_data, include_images=save_images) - converter = dm_env.make_converter('datumaro_project', - save_images=save_images, - config={ 'project_name': task_data.db_task.name, } + dm_env.converters.get('datumaro_project').convert(dataset, + save_dir=save_dir, save_images=save_images, + project_config={ 'project_name': task_data.db_task.name, } ) - converter(dataset, save_dir=save_dir) project = Project.load(save_dir) target_dir = project.config.project_dir diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index 31ee2cbde3a..d3bd074d4d3 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -19,8 +19,8 @@ def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('label_me', save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('label_me').convert(extractor, save_dir=temp_dir, + save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index f0cb361f615..268eb347489 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -26,10 +26,9 @@ def _export(dst_file, task_data, save_images=False): extractor = extractor.transform(envt.get('merge_instance_segments')) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('voc_segmentation', - apply_colormap=True, label_map=make_colormap(task_data), - save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('voc_segmentation').convert(extractor, + save_dir=temp_dir, save_images=save_images, + apply_colormap=True, label_map=make_colormap(task_data)) make_zip_archive(temp_dir, dst_file) @@ -68,8 +67,9 @@ def normalize_label(label): def make_colormap(task_data): labels = sorted([label['name'] for _, label in task_data.meta['task']['labels']]) - if 'background' not in labels: - labels.insert(0, 'background') + if 'background' in labels: + labels.remove('background') + labels.insert(0, 'background') predefined = parse_default_colors() diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 37fa7a7bc79..9e373ee5c8b 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -19,9 +19,8 @@ def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('mot_seq_gt', - save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('mot_seq_gt').convert(extractor, + save_dir=temp_dir, save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index 0973fe4e0b6..ee30564bc1e 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -24,9 +24,8 @@ def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('voc', label_map='source', - save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('voc').convert(extractor, + save_dir=temp_dir, save_images=save_images, label_map='source') make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index fef95aa710f..3b7e123eb63 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -27,9 +27,8 @@ def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('tf_detection_api', - save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('tf_detection_api').convert(extractor, + save_dir=temp_dir, save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index 488fe1a42d7..bea73b3c3bd 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -23,8 +23,8 @@ def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('yolo', save_images=save_images) - converter(extractor, save_dir=temp_dir) + dm_env.converters.get('yolo').convert(extractor, + save_dir=temp_dir, save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/datumaro/datumaro/cli/contexts/project/__init__.py b/datumaro/datumaro/cli/contexts/project/__init__.py index 15b49eb0171..65f81886e5c 100644 --- a/datumaro/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/datumaro/cli/contexts/project/__init__.py @@ -340,9 +340,9 @@ def export_command(args): raise CliException("Converter for format '%s' is not found" % \ args.format) - if hasattr(converter, 'from_cmdline'): - extra_args = converter.from_cmdline(args.extra_args) - converter = converter(**extra_args) + extra_args = converter.from_cmdline(args.extra_args) + def converter_proxy(extractor, save_dir): + return converter.convert(extractor, save_dir, **extra_args) filter_args = FilterModes.make_filter_args(args.filter_mode) @@ -352,7 +352,7 @@ def export_command(args): log.info("Exporting the project...") dataset.export_project( save_dir=dst_dir, - converter=converter, + converter=converter_proxy, filter_expr=args.filter, **filter_args) log.info("Project exported to '%s' as '%s'" % \ diff --git a/datumaro/datumaro/components/converter.py b/datumaro/datumaro/components/converter.py index 9ea404d962d..a7c6e1011c2 100644 --- a/datumaro/datumaro/components/converter.py +++ b/datumaro/datumaro/components/converter.py @@ -3,17 +3,77 @@ # # SPDX-License-Identifier: MIT -class Converter: - def __init__(self, cmdline_args=None): - pass +import logging as log +import os +import os.path as osp +import shutil - def __call__(self, extractor, save_dir): - raise NotImplementedError() +from datumaro.components.cli_plugin import CliPlugin +from datumaro.util.image import save_image - def _parse_cmdline(self, cmdline): - parser = self.build_cmdline_parser() - if len(cmdline) != 0 and cmdline[0] == '--': - cmdline = cmdline[1:] - args = parser.parse_args(cmdline) - return vars(args) \ No newline at end of file +class IConverter: + @classmethod + def convert(cls, extractor, save_dir, **options): + raise NotImplementedError("Should be implemented in a subclass") + +class Converter(IConverter, CliPlugin): + DEFAULT_IMAGE_EXT = None + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--save-images', action='store_true', + help="Save images (default: %(default)s)") + parser.add_argument('--image-ext', default=None, + help="Image extension (default: keep or use format default%s)" % \ + (' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else '')) + + return parser + + @classmethod + def convert(cls, extractor, save_dir, **options): + converter = cls(extractor, save_dir, **options) + return converter.apply() + + def apply(self): + raise NotImplementedError("Should be implemented in a subclass") + + def __init__(self, extractor, save_dir, save_images=False, + image_ext=None, default_image_ext=None): + default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT + assert default_image_ext + self._default_image_ext = default_image_ext + + self._save_images = save_images + self._image_ext = image_ext + + self._extractor = extractor + self._save_dir = save_dir + + def _find_image_ext(self, item): + src_ext = None + if item.has_image: + src_ext = osp.splitext(osp.basename(item.image.path))[1] + + return self._image_ext or src_ext or self._default_image_ext + + def _make_image_filename(self, item): + return item.id + self._find_image_ext(item) + + def _save_image(self, item, path=None): + image = item.image.data + if image is None: + log.warning("Item '%s' has no image", item.id) + return item.image.path + + path = path or self._make_image_filename(item) + + src_ext = osp.splitext(osp.basename(item.image.path))[1] + dst_ext = osp.splitext(osp.basename(path))[1] + + os.makedirs(osp.dirname(path), exist_ok=True) + if src_ext == dst_ext and osp.isfile(item.image.path): + shutil.copyfile(item.image.path, path) + else: + save_image(path, image) diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py index b8273cd5471..a1cd4919ff5 100644 --- a/datumaro/datumaro/components/project.py +++ b/datumaro/datumaro/components/project.py @@ -624,9 +624,8 @@ def save(self, save_dir=None, merge=False, recursive=True, if merge: # merge and save the resulting dataset - converter = self.env.make_converter( - DEFAULT_FORMAT, **converter_kwargs) - converter(self, dataset_save_dir) + self.env.converters.get(DEFAULT_FORMAT).convert( + self, dataset_save_dir, **converter_kwargs) else: if recursive: # children items should already be updated @@ -635,9 +634,8 @@ def save(self, save_dir=None, merge=False, recursive=True, if isinstance(source, ProjectDataset): source.save(**converter_kwargs) - converter = self.env.make_converter( - DEFAULT_FORMAT, **converter_kwargs) - converter(self.iterate_own(), dataset_save_dir) + self.env.converters.get(DEFAULT_FORMAT).convert( + self.iterate_own(), dataset_save_dir, **converter_kwargs) project.save(save_dir) except BaseException: diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py index 0e7dd3a444c..7c30636b2aa 100644 --- a/datumaro/datumaro/plugins/coco_format/converter.py +++ b/datumaro/datumaro/plugins/coco_format/converter.py @@ -3,26 +3,23 @@ # # SPDX-License-Identifier: MIT -from enum import Enum -from itertools import groupby import json import logging as log import os import os.path as osp +from enum import Enum +from itertools import groupby import pycocotools.mask as mask_utils -from datumaro.components.converter import Converter -from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, - AnnotationType, Points -) -from datumaro.components.cli_plugin import CliPlugin -from datumaro.util import find, cast, str_to_bool -from datumaro.util.image import save_image -import datumaro.util.mask_tools as mask_tools import datumaro.util.annotation_tools as anno_tools +import datumaro.util.mask_tools as mask_tools +from datumaro.components.converter import Converter +from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType, + Points) +from datumaro.util import cast, find, str_to_bool -from .format import CocoTask, CocoPath +from .format import CocoPath, CocoTask SegmentationMode = Enum('SegmentationMode', ['guess', 'polygons', 'mask']) @@ -443,7 +440,40 @@ def save_annotations(self, item): self.annotations.append(elem) -class _Converter: +class CocoConverter(Converter): + @staticmethod + def _split_tasks_string(s): + return [CocoTask[i.strip()] for i in s.split(',')] + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--segmentation-mode', + choices=[m.name for m in SegmentationMode], + default=SegmentationMode.guess.name, + help=""" + Save mode for instance segmentation:|n + - '{sm.guess.name}': guess the mode for each instance,|n + |s|suse 'is_crowd' attribute as hint|n + - '{sm.polygons.name}': save polygons,|n + |s|smerge and convert masks, prefer polygons|n + - '{sm.mask.name}': save masks,|n + |s|smerge and convert polygons, prefer masks|n + Default: %(default)s. + """.format(sm=SegmentationMode)) + parser.add_argument('--crop-covered', action='store_true', + help="Crop covered segments so that background objects' " + "segmentation was more accurate (default: %(default)s)") + parser.add_argument('--allow-attributes', + type=str_to_bool, default=True, + help="Allow export of attributes (default: %(default)s)") + parser.add_argument('--tasks', type=cls._split_tasks_string, + help="COCO task filter, comma-separated list of {%s} " + "(default: all)" % ', '.join(t.name for t in CocoTask)) + return parser + + DEFAULT_IMAGE_EXT = CocoPath.IMAGE_EXT + _TASK_CONVERTER = { CocoTask.image_info: _ImageInfoConverter, CocoTask.instances: _InstancesConverter, @@ -453,16 +483,16 @@ class _Converter: } def __init__(self, extractor, save_dir, - tasks=None, save_images=False, segmentation_mode=None, - crop_covered=False, allow_attributes=True): + tasks=None, segmentation_mode=None, crop_covered=False, + allow_attributes=True, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + assert tasks is None or isinstance(tasks, (CocoTask, list, str)) - if tasks is None: - tasks = list(self._TASK_CONVERTER) - elif isinstance(tasks, CocoTask): + if isinstance(tasks, CocoTask): tasks = [tasks] elif isinstance(tasks, str): tasks = [CocoTask[tasks]] - else: + elif tasks: for i, t in enumerate(tasks): if isinstance(t, str): tasks[i] = CocoTask[t] @@ -470,11 +500,6 @@ def __init__(self, extractor, save_dir, assert t in CocoTask, t self._tasks = tasks - self._extractor = extractor - self._save_dir = save_dir - - self._save_images = save_images - assert segmentation_mode is None or \ isinstance(segmentation_mode, str) or \ segmentation_mode in SegmentationMode @@ -502,9 +527,8 @@ def _make_task_converter(self, task): return self._TASK_CONVERTER[task](self) def _make_task_converters(self): - return { - task: self._make_task_converter(task) for task in self._tasks - } + return { task: self._make_task_converter(task) + for task in (self._tasks or self._TASK_CONVERTER) } def _get_image_id(self, item): image_id = self._image_ids.get(item.id) @@ -514,23 +538,14 @@ def _get_image_id(self, item): self._image_ids[item.id] = image_id return image_id - def _save_image(self, item, filename): - image = item.image.data - if image is None: - log.warning("Item '%s' has no image" % item.id) - return '' + def _save_image(self, item, path=None): + super()._save_image(item, + osp.join(self._images_dir, self._make_image_filename(item))) - save_image(osp.join(self._images_dir, filename), image, - create_dir=True) - - def convert(self): + def apply(self): self._make_dirs() - subsets = self._extractor.subsets() - if len(subsets) == 0: - subsets = [ None ] - - for subset_name in subsets: + for subset_name in self._extractor.subsets() or [None]: if subset_name: subset = self._extractor.get_subset(subset_name) else: @@ -541,94 +556,43 @@ def convert(self): for task_conv in task_converters.values(): task_conv.save_categories(subset) for item in subset: - filename = item.id + CocoPath.IMAGE_EXT if self._save_images: if item.has_image: - self._save_image(item, filename) + self._save_image(item) else: - log.debug("Item '%s' has no image info" % item.id) + log.debug("Item '%s' has no image info", item.id) for task_conv in task_converters.values(): - task_conv.save_image_info(item, filename) + task_conv.save_image_info(item, + self._make_image_filename(item)) task_conv.save_annotations(item) for task, task_conv in task_converters.items(): + if task_conv.is_empty() and not self._tasks: + continue task_conv.write(osp.join(self._ann_dir, '%s_%s.json' % (task.name, subset_name))) -class CocoConverter(Converter, CliPlugin): - @staticmethod - def _split_tasks_string(s): - return [CocoTask[i.strip()] for i in s.split(',')] - - @classmethod - def build_cmdline_parser(cls, **kwargs): - kwargs['description'] = """ - Segmentation save modes:|n - - '{sm.guess.name}': guess the mode for each instance,|n - |s|suse 'is_crowd' attribute as a hint|n - - '{sm.polygons.name}': save polygons,|n - |s|smerge and convert masks, prefer polygons|n - - '{sm.mask.name}': save masks,|n - |s|smerge and convert polygons, prefer masks - """.format(sm=SegmentationMode) - parser = super().build_cmdline_parser(**kwargs) - - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - parser.add_argument('--segmentation-mode', - choices=[m.name for m in SegmentationMode], - default=SegmentationMode.guess.name, - help="Save mode for instance segmentation (default: %(default)s)") - parser.add_argument('--crop-covered', action='store_true', - help="Crop covered segments so that background objects' " - "segmentation was more accurate (default: %(default)s)") - parser.add_argument('--allow-attributes', - type=str_to_bool, default=True, - help="Allow export of attributes (default: %(default)s)") - parser.add_argument('--tasks', type=cls._split_tasks_string, - default=None, - help="COCO task filter, comma-separated list of {%s} " - "(default: all)" % ', '.join([t.name for t in CocoTask])) - return parser - - def __init__(self, - tasks=None, save_images=False, segmentation_mode=None, - crop_covered=False, allow_attributes=True): - super().__init__() - - self._options = { - 'tasks': tasks, - 'save_images': save_images, - 'segmentation_mode': segmentation_mode, - 'crop_covered': crop_covered, - 'allow_attributes': allow_attributes, - } - - def __call__(self, extractor, save_dir): - converter = _Converter(extractor, save_dir, **self._options) - converter.convert() - class CocoInstancesConverter(CocoConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.instances - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class CocoImageInfoConverter(CocoConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.image_info - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class CocoPersonKeypointsConverter(CocoConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.person_keypoints - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class CocoCaptionsConverter(CocoConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.captions - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class CocoLabelsConverter(CocoConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.labels - super().__init__(**kwargs) + super().__init__(*args, **kwargs) diff --git a/datumaro/datumaro/plugins/cvat_format/converter.py b/datumaro/datumaro/plugins/cvat_format/converter.py index 36588e2cfe2..0db14e1a17d 100644 --- a/datumaro/datumaro/plugins/cvat_format/converter.py +++ b/datumaro/datumaro/plugins/cvat_format/converter.py @@ -3,25 +3,19 @@ # # SPDX-License-Identifier: MIT -from collections import OrderedDict import logging as log import os import os.path as osp +from collections import OrderedDict from xml.sax.saxutils import XMLGenerator -from datumaro.components.cli_plugin import CliPlugin from datumaro.components.converter import Converter from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType -from datumaro.util import cast -from datumaro.util.image import save_image +from datumaro.util import cast, pairwise from .format import CvatPath -def pairwise(iterable): - a = iter(iterable) - return zip(a, a) - class XmlAnnotationWriter: VERSION = '1.1' @@ -163,15 +157,6 @@ def write(self): self._writer.close_root() - def _save_image(self, item, filename): - image = item.image.data - if image is None: - log.warning("Item '%s' has no image" % item.id) - return '' - - save_image(osp.join(self._context._images_dir, filename), image, - create_dir=True) - def _write_item(self, item, index): image_info = OrderedDict([ ("id", str(cast(item.attributes.get('frame'), int, index))), @@ -186,9 +171,10 @@ def _write_item(self, item, index): image_info["height"] = str(h) if self._context._save_images: - self._save_image(item, filename) + self._context._save_image(item, + osp.join(self._context._images_dir, filename)) else: - log.debug("Item '%s' has no image info" % item.id) + log.debug("Item '%s' has no image info", item.id) self._writer.open_image(image_info) for ann in item.annotations: @@ -321,15 +307,10 @@ def _write_tag(self, label): self._writer.close_tag() -class _Converter: - def __init__(self, extractor, save_dir, save_images=False): - self._extractor = extractor - self._save_dir = save_dir - self._save_images = save_images - - def convert(self): - os.makedirs(self._save_dir, exist_ok=True) +class CvatConverter(Converter): + DEFAULT_IMAGE_EXT = CvatPath.IMAGE_EXT + def apply(self): images_dir = osp.join(self._save_dir, CvatPath.IMAGES_DIR) os.makedirs(images_dir, exist_ok=True) self._images_dir = images_dir @@ -348,22 +329,3 @@ def convert(self): with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f: writer = _SubsetWriter(f, subset_name, subset, self) writer.write() - -class CvatConverter(Converter, CliPlugin): - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - return parser - - def __init__(self, save_images=False): - super().__init__() - - self._options = { - 'save_images': save_images, - } - - def __call__(self, extractor, save_dir): - converter = _Converter(extractor, save_dir, **self._options) - converter.convert() \ No newline at end of file diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index 7130dc97f07..6e948a25baf 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -17,9 +17,7 @@ LabelCategories, MaskCategories, PointsCategories ) from datumaro.util import cast -from datumaro.util.image import save_image import pycocotools.mask as mask_utils -from datumaro.components.cli_plugin import CliPlugin from .format import DatumaroPath @@ -213,13 +211,10 @@ def _convert_points_categories(self, obj): }) return converted -class _Converter: - def __init__(self, extractor, save_dir, save_images=False): - self._extractor = extractor - self._save_dir = save_dir - self._save_images = save_images +class DatumaroConverter(Converter): + DEFAULT_IMAGE_EXT = DatumaroPath.IMAGE_EXT - def convert(self): + def apply(self): os.makedirs(self._save_dir, exist_ok=True) images_dir = osp.join(self._save_dir, DatumaroPath.IMAGES_DIR) @@ -230,9 +225,7 @@ def convert(self): os.makedirs(annotations_dir, exist_ok=True) self._annotations_dir = annotations_dir - subsets = self._extractor.subsets() - if len(subsets) == 0: - subsets = [ None ] + subsets = self._extractor.subsets() or [None] subsets = [n or DEFAULT_SUBSET_NAME for n in subsets] subsets = { name: _SubsetWriter(name, self) for name in subsets } @@ -248,55 +241,20 @@ def convert(self): for subset, writer in subsets.items(): writer.write(annotations_dir) - def _save_image(self, item): - image = item.image.data - if image is None: - return '' - - filename = item.id + DatumaroPath.IMAGE_EXT - image_path = osp.join(self._images_dir, filename) - save_image(image_path, image, create_dir=True) - return filename - -class DatumaroConverter(Converter, CliPlugin): - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - return parser - - def __init__(self, save_images=False): - super().__init__() - - self._options = { - 'save_images': save_images, - } - - def __call__(self, extractor, save_dir): - converter = _Converter(extractor, save_dir, **self._options) - converter.convert() - + def _save_image(self, item, path=None): + super()._save_image(item, + osp.join(self._images_dir, self._make_image_filename(item))) class DatumaroProjectConverter(Converter): @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - return parser - - def __init__(self, config=None, save_images=False): - self._config = config - self._save_images = save_images - - def __call__(self, extractor, save_dir): + def convert(cls, extractor, save_dir, **kwargs): os.makedirs(save_dir, exist_ok=True) from datumaro.components.project import Project - project = Project.generate(save_dir, config=self._config) + project = Project.generate(save_dir, + config=kwargs.pop('project_config', None)) - converter = project.env.make_converter('datumaro', - save_images=self._save_images) - converter(extractor, save_dir=osp.join( - project.config.project_dir, project.config.dataset_dir)) \ No newline at end of file + DatumaroConverter.convert(extractor, + save_dir=osp.join( + project.config.project_dir, project.config.dataset_dir), + **kwargs) \ No newline at end of file diff --git a/datumaro/datumaro/plugins/image_dir.py b/datumaro/datumaro/plugins/image_dir.py index b6c25d60b5a..410a91f8dac 100644 --- a/datumaro/datumaro/plugins/image_dir.py +++ b/datumaro/datumaro/plugins/image_dir.py @@ -3,12 +3,13 @@ # # SPDX-License-Identifier: MIT +import logging as log import os import os.path as osp from datumaro.components.extractor import DatasetItem, SourceExtractor, Importer from datumaro.components.converter import Converter -from datumaro.util.image import save_image +from datumaro.util.image import Image class ImageDirImporter(Importer): @@ -32,8 +33,6 @@ def __call__(self, path, **extra_params): class ImageDirExtractor(SourceExtractor): - _SUPPORTED_FORMATS = ['.png', '.jpg'] - def __init__(self, url): super().__init__() @@ -43,11 +42,15 @@ def __init__(self, url): for dirpath, _, filenames in os.walk(url): for name in filenames: path = osp.join(dirpath, name) - if not self._is_image(path): + try: + image = Image(path) + # force loading + image.data # pylint: disable=pointless-statement + except Exception: continue item_id = osp.relpath(osp.splitext(path)[0], url) - items.append(DatasetItem(id=item_id, image=path)) + items.append(DatasetItem(id=item_id, image=image)) self._items = items @@ -58,20 +61,16 @@ def __iter__(self): def __len__(self): return len(self._items) - def _is_image(self, path): - if not osp.isfile(path): - return False - for ext in self._SUPPORTED_FORMATS: - if path.endswith(ext): - return True - return False - class ImageDirConverter(Converter): - def __call__(self, extractor, save_dir): - os.makedirs(save_dir, exist_ok=True) + DEFAULT_IMAGE_EXT = '.jpg' + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) - for item in extractor: - if item.has_image and item.image.has_data: - save_image(osp.join(save_dir, item.id + '.jpg'), - item.image.data, create_dir=True) + for item in self._extractor: + if item.has_image: + self._save_image(item, + osp.join(self._save_dir, self._make_image_filename(item))) + else: + log.debug("Item '%s' has no image info", item.id) \ No newline at end of file diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py index ac998cbf654..d8c71716d90 100644 --- a/datumaro/datumaro/plugins/labelme_format.py +++ b/datumaro/datumaro/plugins/labelme_format.py @@ -15,7 +15,6 @@ ) from datumaro.components.extractor import Importer from datumaro.components.converter import Converter -from datumaro.components.cli_plugin import CliPlugin from datumaro.util.image import Image, save_image from datumaro.util.mask_tools import load_mask, find_mask_bbox @@ -254,8 +253,7 @@ def __call__(self, path, **extra_params): params.update(extra_params) source_name = osp.splitext(osp.basename(subset_path))[0] - project.add_source(source_name, - { + project.add_source(source_name, { 'url': subset_path, 'format': self._EXTRACTOR_NAME, 'options': params, @@ -285,34 +283,18 @@ def has_annotations(d): return subset_paths -class LabelMeConverter(Converter, CliPlugin): - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - return parser - - def __init__(self, save_images=False): - super().__init__() - - self._save_images = save_images - - def __call__(self, extractor, save_dir): - self._extractor = extractor - - subsets = extractor.subsets() - if len(subsets) == 0: - subsets = [ None ] +class LabelMeConverter(Converter): + DEFAULT_IMAGE_EXT = LabelMePath.IMAGE_EXT - for subset_name in subsets: + def apply(self): + for subset_name in self._extractor.subsets() or [None]: if subset_name: - subset = extractor.get_subset(subset_name) + subset = self._extractor.get_subset(subset_name) else: subset_name = DEFAULT_SUBSET_NAME - subset = extractor + subset = self._extractor - subset_dir = osp.join(save_dir, subset_name) + subset_dir = osp.join(self._save_dir, subset_name) os.makedirs(subset_dir, exist_ok=True) os.makedirs(osp.join(subset_dir, LabelMePath.MASKS_DIR), exist_ok=True) @@ -335,13 +317,12 @@ def _save_item(self, item, subset_dir): raise Exception("Can't export item '%s': " "LabelMe format only supports flat image layout" % item.id) - image_filename = item.id + LabelMePath.IMAGE_EXT + image_filename = self._make_image_filename(item) if self._save_images: if item.has_image and item.image.has_data: - save_image(osp.join(subset_dir, image_filename), - item.image.data, create_dir=True) + self._save_image(item, osp.join(subset_dir, image_filename)) else: - log.debug("Item '%s' has no image" % item.id) + log.debug("Item '%s' has no image", item.id) root_elem = ET.Element('annotation') ET.SubElement(root_elem, 'filename').text = image_filename diff --git a/datumaro/datumaro/plugins/mot_format.py b/datumaro/datumaro/plugins/mot_format.py index 6406d47b5af..f3776078b23 100644 --- a/datumaro/datumaro/plugins/mot_format.py +++ b/datumaro/datumaro/plugins/mot_format.py @@ -19,7 +19,6 @@ ) from datumaro.components.extractor import Importer from datumaro.components.converter import Converter -from datumaro.components.cli_plugin import CliPlugin from datumaro.util import cast from datumaro.util.image import Image, save_image @@ -253,25 +252,17 @@ def find_subsets(path): subsets.append(p) return subsets -class MotSeqGtConverter(Converter, CliPlugin): - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().__init__(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - return parser - - def __init__(self, save_images=False): - super().__init__() +class MotSeqGtConverter(Converter): + DEFAULT_IMAGE_EXT = MotPath.IMAGE_EXT - self._save_images = save_images + def apply(self): + extractor = self._extractor - def __call__(self, extractor, save_dir): - images_dir = osp.join(save_dir, MotPath.IMAGE_DIR) + images_dir = osp.join(self._save_dir, MotPath.IMAGE_DIR) os.makedirs(images_dir, exist_ok=True) self._images_dir = images_dir - anno_dir = osp.join(save_dir, 'gt') + anno_dir = osp.join(self._save_dir, 'gt') os.makedirs(anno_dir, exist_ok=True) anno_file = osp.join(anno_dir, MotPath.GT_FILENAME) with open(anno_file, 'w', encoding="utf-8") as csv_file: @@ -291,6 +282,7 @@ def __call__(self, extractor, save_dir): if track_id not in track_id_mapping: track_id_mapping[track_id] = len(track_id_mapping) track_id = track_id_mapping[track_id] + writer.writerow({ 'frame_id': frame_id, 'track_id': track_id, @@ -311,11 +303,10 @@ def __call__(self, extractor, save_dir): if self._save_images: if item.has_image and item.image.has_data: - save_image(osp.join(self._images_dir, - '%06d%s' % (frame_id, MotPath.IMAGE_EXT)), - item.image.data) + self._save_image(item, osp.join(self._images_dir, + '%06d%s' % (frame_id, self._find_image_ext(item)))) else: - log.debug("Item '%s' has no image" % item.id) + log.debug("Item '%s' has no image", item.id) labels_file = osp.join(anno_dir, MotPath.LABELS_FILE) with open(labels_file, 'w', encoding='utf-8') as f: diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py index 1273b7b221e..a88822d3c8d 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py @@ -14,11 +14,10 @@ LabelCategories ) from datumaro.components.converter import Converter -from datumaro.components.cli_plugin import CliPlugin from datumaro.util.image import encode_image -from datumaro.util.mask_tools import merge_masks from datumaro.util.annotation_tools import (compute_bbox, find_group_leader, find_instances) +from datumaro.util.mask_tools import merge_masks from datumaro.util.tf_util import import_tf as _import_tf from .format import DetectionApiPath @@ -45,26 +44,25 @@ def bytes_list_feature(value): def float_list_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=value)) -class TfDetectionApiConverter(Converter, CliPlugin): +class TfDetectionApiConverter(Converter): + DEFAULT_IMAGE_EXT = DetectionApiPath.DEFAULT_IMAGE_EXT + @classmethod def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") parser.add_argument('--save-masks', action='store_true', help="Include instance masks (default: %(default)s)") return parser - def __init__(self, save_images=False, save_masks=False): - super().__init__() + def __init__(self, extractor, save_dir, save_masks=False, **kwargs): + super().__init__(extractor, save_dir, **kwargs) - self._save_images = save_images self._save_masks = save_masks - def __call__(self, extractor, save_dir): - os.makedirs(save_dir, exist_ok=True) + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) - label_categories = extractor.categories().get(AnnotationType.label, + label_categories = self._extractor.categories().get(AnnotationType.label, LabelCategories()) get_label = lambda label_id: label_categories.items[label_id].name \ if label_id is not None else '' @@ -74,18 +72,18 @@ def __call__(self, extractor, save_dir): self._get_label = get_label self._get_label_id = map_label_id - subsets = extractor.subsets() + subsets = self._extractor.subsets() if len(subsets) == 0: subsets = [ None ] for subset_name in subsets: if subset_name: - subset = extractor.get_subset(subset_name) + subset = self._extractor.get_subset(subset_name) else: subset_name = DEFAULT_SUBSET_NAME - subset = extractor + subset = self._extractor - labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE) + labelmap_path = osp.join(self._save_dir, DetectionApiPath.LABELMAP_FILE) with codecs.open(labelmap_path, 'w', encoding='utf8') as f: for label, idx in label_ids.items(): f.write( @@ -95,7 +93,7 @@ def __call__(self, extractor, save_dir): '}\n\n' ) - anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name)) + anno_path = osp.join(self._save_dir, '%s.tfrecord' % (subset_name)) with tf.io.TFRecordWriter(anno_path) as writer: for item in subset: tf_example = self._make_tf_example(item) @@ -167,7 +165,7 @@ def _make_tf_example(self, item): ), } - filename = item.id + DetectionApiPath.IMAGE_EXT + filename = self._make_image_filename(item) features['image/filename'] = bytes_feature(filename.encode('utf-8')) if not item.has_image: @@ -186,8 +184,7 @@ def _make_tf_example(self, item): }) if self._save_images: if item.has_image and item.image.has_data: - fmt = DetectionApiPath.IMAGE_FORMAT - buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT) + buffer, fmt = self._save_image(item, filename) features.update({ 'image/encoded': bytes_feature(buffer), @@ -204,3 +201,13 @@ def _make_tf_example(self, item): features=tf.train.Features(feature=features)) return tf_example + + def _save_image(self, item, path=None): + dst_ext = osp.splitext(osp.basename(path))[1] + fmt = DetectionApiPath.IMAGE_EXT_FORMAT.get(dst_ext) + if not fmt: + log.warning("Item '%s': can't find format string for the '%s' " + "image extension, the corresponding field will be empty." % \ + (item.id, dst_ext)) + buffer = encode_image(item.image.data, dst_ext) + return buffer, fmt \ No newline at end of file diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py index 13958895029..7928c78846e 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py @@ -117,7 +117,6 @@ def _parse_tfrecord_file(cls, filepath, subset, images_dir): frame_width = tf.cast( parsed_record['image/width'], tf.int64).numpy().item() frame_image = parsed_record['image/encoded'].numpy() - frame_format = parsed_record['image/format'].numpy().decode('utf-8') xmins = tf.sparse.to_dense( parsed_record['image/object/bbox/xmin']).numpy() ymins = tf.sparse.to_dense( @@ -176,7 +175,7 @@ def _parse_tfrecord_file(cls, filepath, subset, images_dir): image_size = (frame_height, frame_width) image_params = {} - if frame_image and frame_format: + if frame_image: image_params['data'] = lazy_image(frame_image, decode_image) if frame_filename: image_params['path'] = osp.join(images_dir, frame_filename) diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/format.py b/datumaro/datumaro/plugins/tf_detection_api_format/format.py index 9e31212e89c..829a89e4efc 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/format.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/format.py @@ -7,7 +7,7 @@ class DetectionApiPath: IMAGES_DIR = 'images' ANNOTATIONS_DIR = 'annotations' - IMAGE_EXT = '.jpg' - IMAGE_FORMAT = 'jpeg' + DEFAULT_IMAGE_EXT = '.jpg' + IMAGE_EXT_FORMAT = {'.jpg': 'jpeg', '.png': 'png'} LABELMAP_FILE = 'label_map.pbtxt' \ No newline at end of file diff --git a/datumaro/datumaro/plugins/voc_format/converter.py b/datumaro/datumaro/plugins/voc_format/converter.py index 67130a78bbf..0de87dfbdde 100644 --- a/datumaro/datumaro/plugins/voc_format/converter.py +++ b/datumaro/datumaro/plugins/voc_format/converter.py @@ -3,20 +3,19 @@ # # SPDX-License-Identifier: MIT +import logging as log +import os +import os.path as osp from collections import OrderedDict, defaultdict from enum import Enum from itertools import chain -import logging as log + from lxml import etree as ET -import os -import os.path as osp -from datumaro.components.cli_plugin import CliPlugin from datumaro.components.converter import Converter from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType, - LabelCategories, CompiledMask, -) -from datumaro.util import str_to_bool, find + CompiledMask, LabelCategories) +from datumaro.util import find, str_to_bool from datumaro.util.image import save_image from datumaro.util.mask_tools import paint_mask, remap_mask @@ -50,10 +49,47 @@ def _write_xml_bbox(bbox, parent_elem): LabelmapType = Enum('LabelmapType', ['voc', 'source', 'guess']) -class _Converter: +class VocConverter(Converter): + DEFAULT_IMAGE_EXT = VocPath.IMAGE_EXT + + @staticmethod + def _split_tasks_string(s): + return [VocTask[i.strip()] for i in s.split(',')] + + @staticmethod + def _get_labelmap(s): + if osp.isfile(s): + return s + try: + return LabelmapType[s].name + except KeyError: + import argparse + raise argparse.ArgumentTypeError() + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + + parser.add_argument('--apply-colormap', type=str_to_bool, default=True, + help="Use colormap for class and instance masks " + "(default: %(default)s)") + parser.add_argument('--label-map', type=cls._get_labelmap, default=None, + help="Labelmap file path or one of %s" % \ + ', '.join(t.name for t in LabelmapType)) + parser.add_argument('--allow-attributes', + type=str_to_bool, default=True, + help="Allow export of attributes (default: %(default)s)") + parser.add_argument('--tasks', type=cls._split_tasks_string, + help="VOC task filter, comma-separated list of {%s} " + "(default: all)" % ', '.join(t.name for t in VocTask)) + + return parser + def __init__(self, extractor, save_dir, - tasks=None, apply_colormap=True, save_images=False, label_map=None, - allow_attributes=True): + tasks=None, apply_colormap=True, label_map=None, + allow_attributes=True, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + assert tasks is None or isinstance(tasks, (VocTask, list, set)) if tasks is None: tasks = set(VocTask) @@ -63,20 +99,17 @@ def __init__(self, extractor, save_dir, tasks = set(t if t in VocTask else VocTask[t] for t in tasks) self._tasks = tasks - self._extractor = extractor - self._save_dir = save_dir self._apply_colormap = apply_colormap self._allow_attributes = allow_attributes - self._save_images = save_images self._load_categories(label_map) - def convert(self): - self.init_dirs() + def apply(self): + self.make_dirs() self.save_subsets() self.save_label_map() - def init_dirs(self): + def make_dirs(self): save_dir = self._save_dir subsets_dir = osp.join(save_dir, VocPath.SUBSETS_DIR) cls_subsets_dir = osp.join(subsets_dir, @@ -116,11 +149,7 @@ def get_label(self, label_id): categories()[AnnotationType.label].items[label_id].name def save_subsets(self): - subsets = self._extractor.subsets() - if len(subsets) == 0: - subsets = [ None ] - - for subset_name in subsets: + for subset_name in self._extractor.subsets() or [None]: if subset_name: subset = self._extractor.get_subset(subset_name) else: @@ -136,13 +165,13 @@ def save_subsets(self): for item in subset: log.debug("Converting item '%s'", item.id) - image_filename = item.id + VocPath.IMAGE_EXT + image_filename = self._make_image_filename(item) if self._save_images: if item.has_image and item.image.has_data: - save_image(osp.join(self._images_dir, image_filename), - item.image.data, create_dir=True) + self._save_image(item, + osp.join(self._images_dir, image_filename)) else: - log.debug("Item '%s' has no image" % item.id) + log.debug("Item '%s' has no image", item.id) labels = [] bboxes = [] @@ -269,9 +298,7 @@ def save_subsets(self): if len(attrs_elem): obj_elem.append(attrs_elem) - if self._tasks & {None, - VocTask.detection, - VocTask.person_layout, + if self._tasks & {VocTask.detection, VocTask.person_layout, VocTask.action_classification}: ann_path = osp.join(self._ann_dir, item.id + '.xml') os.makedirs(osp.dirname(ann_path), exist_ok=True) @@ -314,19 +341,16 @@ def save_subsets(self): action_list[item.id] = None segm_list[item.id] = None - if self._tasks & {None, - VocTask.classification, - VocTask.detection, - VocTask.action_classification, - VocTask.person_layout}: + if self._tasks & {VocTask.classification, VocTask.detection, + VocTask.action_classification, VocTask.person_layout}: self.save_clsdet_lists(subset_name, clsdet_list) - if self._tasks & {None, VocTask.classification}: + if self._tasks & {VocTask.classification}: self.save_class_lists(subset_name, class_lists) - if self._tasks & {None, VocTask.action_classification}: + if self._tasks & {VocTask.action_classification}: self.save_action_lists(subset_name, action_list) - if self._tasks & {None, VocTask.person_layout}: + if self._tasks & {VocTask.person_layout}: self.save_layout_lists(subset_name, layout_list) - if self._tasks & {None, VocTask.segmentation}: + if self._tasks & {VocTask.segmentation}: self.save_segm_lists(subset_name, segm_list) def save_action_lists(self, subset_name, action_list): @@ -555,79 +579,27 @@ def map_id(src_id): def _remap_mask(self, mask): return remap_mask(mask, self._label_id_mapping) -class VocConverter(Converter, CliPlugin): - @staticmethod - def _split_tasks_string(s): - return [VocTask[i.strip()] for i in s.split(',')] - - @staticmethod - def _get_labelmap(s): - if osp.isfile(s): - return s - try: - return LabelmapType[s].name - except KeyError: - import argparse - raise argparse.ArgumentTypeError() - - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - parser.add_argument('--apply-colormap', type=str_to_bool, default=True, - help="Use colormap for class and instance masks " - "(default: %(default)s)") - parser.add_argument('--label-map', type=cls._get_labelmap, default=None, - help="Labelmap file path or one of %s" % \ - ', '.join(t.name for t in LabelmapType)) - parser.add_argument('--allow-attributes', - type=str_to_bool, default=True, - help="Allow export of attributes (default: %(default)s)") - parser.add_argument('--tasks', type=cls._split_tasks_string, - help="VOC task filter, comma-separated list of {%s} " - "(default: all)" % ', '.join(t.name for t in VocTask)) - - return parser - - def __init__(self, tasks=None, save_images=False, - apply_colormap=False, label_map=None, allow_attributes=True): - super().__init__() - - self._options = { - 'tasks': tasks, - 'save_images': save_images, - 'apply_colormap': apply_colormap, - 'label_map': label_map, - 'allow_attributes': allow_attributes, - } - - def __call__(self, extractor, save_dir): - converter = _Converter(extractor, save_dir, **self._options) - converter.convert() - class VocClassificationConverter(VocConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = VocTask.classification - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class VocDetectionConverter(VocConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = VocTask.detection - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class VocLayoutConverter(VocConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = VocTask.person_layout - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class VocActionConverter(VocConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = VocTask.action_classification - super().__init__(**kwargs) + super().__init__(*args, **kwargs) class VocSegmentationConverter(VocConverter): - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): kwargs['tasks'] = VocTask.segmentation - super().__init__(**kwargs) + super().__init__(*args, **kwargs) diff --git a/datumaro/datumaro/plugins/yolo_format/converter.py b/datumaro/datumaro/plugins/yolo_format/converter.py index 2d14a06368a..a8ed3524392 100644 --- a/datumaro/datumaro/plugins/yolo_format/converter.py +++ b/datumaro/datumaro/plugins/yolo_format/converter.py @@ -3,15 +3,13 @@ # # SPDX-License-Identifier: MIT -from collections import OrderedDict import logging as log import os import os.path as osp +from collections import OrderedDict from datumaro.components.converter import Converter from datumaro.components.extractor import AnnotationType -from datumaro.components.cli_plugin import CliPlugin -from datumaro.util.image import save_image from .format import YoloPath @@ -26,21 +24,14 @@ def _make_yolo_bbox(img_size, box): h = (box[3] - box[1]) / img_size[1] return x, y, w, h -class YoloConverter(Converter, CliPlugin): +class YoloConverter(Converter): # https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects + DEFAULT_IMAGE_EXT = '.jpg' - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('--save-images', action='store_true', - help="Save images (default: %(default)s)") - return parser + def apply(self): + extractor = self._extractor + save_dir = self._save_dir - def __init__(self, save_images=False): - super().__init__() - self._save_images = save_images - - def __call__(self, extractor, save_dir): os.makedirs(save_dir, exist_ok=True) label_categories = extractor.categories()[AnnotationType.label] @@ -50,13 +41,9 @@ def __call__(self, extractor, save_dir): f.writelines('%s\n' % l[0] for l in sorted(label_ids.items(), key=lambda x: x[1])) - subsets = extractor.subsets() - if len(subsets) == 0: - subsets = [ None ] - subset_lists = OrderedDict() - for subset_name in subsets: + for subset_name in extractor.subsets() or [None]: if subset_name and subset_name in YoloPath.SUBSET_NAMES: subset = extractor.get_subset(subset_name) elif not subset_name: @@ -80,11 +67,10 @@ def __call__(self, extractor, save_dir): "item has no image info" % item.id) height, width = item.image.size - image_name = item.id + '.jpg' + image_name = self._make_image_filename(item) if self._save_images: if item.has_image and item.image.has_data: - save_image(osp.join(subset_dir, image_name), - item.image.data, create_dir=True) + self._save_image(item, osp.join(subset_dir, image_name)) else: log.warning("Item '%s' has no image" % item.id) image_paths[item.id] = osp.join('data', @@ -119,4 +105,4 @@ def __call__(self, extractor, save_dir): osp.join('data', subset_list_name))) f.write('names = %s\n' % osp.join('data', 'obj.names')) - f.write('backup = backup/\n') \ No newline at end of file + f.write('backup = backup/\n') diff --git a/datumaro/datumaro/util/__init__.py b/datumaro/datumaro/util/__init__.py index 1db427b1797..126a365bcc6 100644 --- a/datumaro/datumaro/util/__init__.py +++ b/datumaro/datumaro/util/__init__.py @@ -62,6 +62,10 @@ def to_snake_case(s): name.append(char) return ''.join(name) +def pairwise(iterable): + a = iter(iterable) + return zip(a, a) + def take_by(iterable, count): """ Returns elements from the input iterable by batches of N items. diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py index 08c75d0f2fd..129d64e7882 100644 --- a/datumaro/tests/test_coco_format.py +++ b/datumaro/tests/test_coco_format.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import os.path as osp @@ -92,7 +93,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoCaptionsConverter(), test_dir) + CocoCaptionsConverter.convert, test_dir) def test_can_save_and_load_instances(self): label_categories = LabelCategories() @@ -186,7 +187,7 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoInstancesConverter(), test_dir, + CocoInstancesConverter.convert, test_dir, target_dataset=DstExtractor()) def test_can_merge_polygons_on_loading(self): @@ -237,7 +238,7 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(SrcExtractor(), - CocoInstancesConverter(), test_dir, + CocoInstancesConverter.convert, test_dir, importer_args={'merge_instance_polygons': True}, target_dataset=DstExtractor()) @@ -295,8 +296,8 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(SrcTestExtractor(), - CocoInstancesConverter(crop_covered=True), test_dir, - target_dataset=DstTestExtractor()) + partial(CocoInstancesConverter.convert, crop_covered=True), + test_dir, target_dataset=DstTestExtractor()) def test_can_convert_polygons_to_mask(self): label_categories = LabelCategories() @@ -345,8 +346,8 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(SrcTestExtractor(), - CocoInstancesConverter(segmentation_mode='mask'), test_dir, - target_dataset=DstTestExtractor()) + partial(CocoInstancesConverter.convert, segmentation_mode='mask'), + test_dir, target_dataset=DstTestExtractor()) def test_can_convert_masks_to_polygons(self): label_categories = LabelCategories() @@ -395,8 +396,8 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(SrcExtractor(), - CocoInstancesConverter(segmentation_mode='polygons'), test_dir, - target_dataset=DstExtractor()) + partial(CocoInstancesConverter.convert, segmentation_mode='polygons'), + test_dir, target_dataset=DstExtractor()) def test_can_save_and_load_images(self): class TestExtractor(Extractor): @@ -414,7 +415,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoImageInfoConverter(), test_dir) + CocoImageInfoConverter.convert, test_dir) def test_can_save_and_load_labels(self): class TestExtractor(Extractor): @@ -438,7 +439,7 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoLabelsConverter(), test_dir) + CocoLabelsConverter.convert, test_dir) def test_can_save_and_load_keypoints(self): label_categories = LabelCategories() @@ -517,7 +518,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoPersonKeypointsConverter(), test_dir, + CocoPersonKeypointsConverter.convert, test_dir, target_dataset=DstTestExtractor()) def test_can_save_dataset_with_no_subsets(self): @@ -528,12 +529,9 @@ def __iter__(self): DatasetItem(id=2, attributes={'id': 2}), ]) - def categories(self): - return { AnnotationType.label: LabelCategories() } - with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoConverter(), test_dir) + CocoConverter.convert, test_dir) def test_can_save_dataset_with_image_info(self): class TestExtractor(Extractor): @@ -545,7 +543,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoConverter(tasks='image_info'), test_dir) + CocoImageInfoConverter.convert, test_dir) def test_relative_paths(self): class TestExtractor(Extractor): @@ -561,7 +559,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoConverter(tasks='image_info', save_images=True), test_dir) + partial(CocoImageInfoConverter.convert, save_images=True), + test_dir) def test_preserve_coco_ids(self): class TestExtractor(Extractor): @@ -573,7 +572,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoConverter(tasks='image_info', save_images=True), test_dir) + partial(CocoImageInfoConverter.convert, save_images=True), + test_dir) def test_annotation_attributes(self): class TestExtractor(Extractor): @@ -593,4 +593,4 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CocoConverter(), test_dir) \ No newline at end of file + CocoConverter.convert, test_dir) diff --git a/datumaro/tests/test_cvat_format.py b/datumaro/tests/test_cvat_format.py index 463c662d9f3..9f2622034eb 100644 --- a/datumaro/tests/test_cvat_format.py +++ b/datumaro/tests/test_cvat_format.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import os.path as osp @@ -250,7 +251,7 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(SrcExtractor(), - CvatConverter(save_images=True), test_dir, + partial(CvatConverter.convert, save_images=True), test_dir, target_dataset=DstExtractor()) def test_relative_paths(self): @@ -281,7 +282,7 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(SrcExtractor(), - CvatConverter(save_images=True), test_dir, + partial(CvatConverter.convert, save_images=True), test_dir, target_dataset=DstExtractor()) def test_preserve_frame_ids(self): @@ -297,4 +298,4 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - CvatConverter(save_images=True), test_dir) + CvatConverter.convert, test_dir) diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py index 8d5b723a780..26e6fc88ddc 100644 --- a/datumaro/tests/test_datumaro_format.py +++ b/datumaro/tests/test_datumaro_format.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np from unittest import TestCase @@ -90,11 +91,11 @@ def categories(self): def test_can_save_and_load(self): with TestDir() as test_dir: self._test_save_and_load(self.TestExtractor(), - DatumaroConverter(save_images=True), test_dir) + partial(DatumaroConverter.convert, save_images=True), test_dir) def test_can_detect(self): with TestDir() as test_dir: - DatumaroConverter()(self.TestExtractor(), save_dir=test_dir) + DatumaroConverter.convert(self.TestExtractor(), save_dir=test_dir) self.assertTrue(DatumaroImporter.detect(test_dir)) @@ -109,4 +110,4 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - DatumaroConverter(save_images=True), test_dir) + partial(DatumaroConverter.convert, save_images=True), test_dir) diff --git a/datumaro/tests/test_image_dir_format.py b/datumaro/tests/test_image_dir_format.py index bcf50fd4288..b991220ebb1 100644 --- a/datumaro/tests/test_image_dir_format.py +++ b/datumaro/tests/test_image_dir_format.py @@ -20,7 +20,7 @@ def __iter__(self): with TestDir() as test_dir: source_dataset = TestExtractor() - ImageDirConverter()(source_dataset, save_dir=test_dir) + ImageDirConverter.convert(source_dataset, save_dir=test_dir) project = Project.import_from(test_dir, 'image_dir') parsed_dataset = project.make_dataset() @@ -39,7 +39,7 @@ def __iter__(self): with TestDir() as test_dir: source_dataset = TestExtractor() - ImageDirConverter()(source_dataset, save_dir=test_dir) + ImageDirConverter.convert(source_dataset, save_dir=test_dir) project = Project.import_from(test_dir, 'image_dir') parsed_dataset = project.make_dataset() diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py index f6c4596417a..b3abd823d91 100644 --- a/datumaro/tests/test_labelme_format.py +++ b/datumaro/tests/test_labelme_format.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import os.path as osp @@ -104,8 +105,8 @@ def categories(self): } with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), LabelMeConverter(save_images=True), + self._test_save_and_load(SrcExtractor(), + partial(LabelMeConverter.convert, save_images=True), test_dir, target_dataset=DstExtractor()) def test_cant_save_dataset_with_relative_paths(self): @@ -121,7 +122,8 @@ def categories(self): with self.assertRaisesRegex(Exception, r'only supports flat'): with TestDir() as test_dir: self._test_save_and_load(SrcExtractor(), - LabelMeConverter(save_images=True), test_dir) + partial(LabelMeConverter.convert, save_images=True), + test_dir) DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset') diff --git a/datumaro/tests/test_mot_format.py b/datumaro/tests/test_mot_format.py index 2aaadd0d94a..9f212116b7e 100644 --- a/datumaro/tests/test_mot_format.py +++ b/datumaro/tests/test_mot_format.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import os.path as osp @@ -114,8 +115,8 @@ def categories(self): } with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), MotSeqGtConverter(save_images=True), + self._test_save_and_load(SrcExtractor(), + partial(MotSeqGtConverter.convert, save_images=True), test_dir, target_dataset=DstExtractor()) diff --git a/datumaro/tests/test_project.py b/datumaro/tests/test_project.py index 267ef378e3c..023341c6a81 100644 --- a/datumaro/tests/test_project.py +++ b/datumaro/tests/test_project.py @@ -7,7 +7,6 @@ from datumaro.components.project import Project, Environment, Dataset from datumaro.components.config_model import Source, Model from datumaro.components.launcher import Launcher, ModelTransform -from datumaro.components.converter import Converter from datumaro.components.extractor import (Extractor, DatasetItem, Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, LabelCategories, AnnotationType @@ -181,12 +180,6 @@ def launch(self, inputs): for inp in inputs: yield [ Label(inp[0, 0, 0]) ] - class TestConverter(Converter): - def __call__(self, extractor, save_dir): - for item in extractor: - with open(osp.join(save_dir, '%s.txt' % item.id), 'w') as f: - f.write(str(item.annotations[0].label) + '\n') - class TestExtractorDst(Extractor): def __init__(self, url): super().__init__() @@ -206,7 +199,6 @@ def __iter__(self): project = Project() project.env.launchers.register(launcher_name, TestLauncher) project.env.extractors.register(extractor_name, TestExtractorSrc) - project.env.converters.register(extractor_name, TestConverter) project.add_model(model_name, { 'launcher': launcher_name }) project.add_source('source', { 'format': extractor_name }) diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py index b7110c7fc90..5071ad25915 100644 --- a/datumaro/tests/test_tfrecord_format.py +++ b/datumaro/tests/test_tfrecord_format.py @@ -1,3 +1,4 @@ +from functools import partial import numpy as np import os.path as osp @@ -69,8 +70,8 @@ def categories(self): } with TestDir() as test_dir: - self._test_save_and_load( - TestExtractor(), TfDetectionApiConverter(save_images=True), + self._test_save_and_load(TestExtractor(), + partial(TfDetectionApiConverter.convert, save_images=True), test_dir) def test_can_save_masks(self): @@ -99,8 +100,8 @@ def categories(self): } with TestDir() as test_dir: - self._test_save_and_load( - TestExtractor(), TfDetectionApiConverter(save_masks=True), + self._test_save_and_load(TestExtractor(), + partial(TfDetectionApiConverter.convert, save_masks=True), test_dir) def test_can_save_dataset_with_no_subsets(self): @@ -139,8 +140,8 @@ def categories(self): } with TestDir() as test_dir: - self._test_save_and_load( - TestExtractor(), TfDetectionApiConverter(save_images=True), + self._test_save_and_load(TestExtractor(), + partial(TfDetectionApiConverter.convert, save_images=True), test_dir) def test_can_save_dataset_with_image_info(self): @@ -158,7 +159,7 @@ def categories(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - TfDetectionApiConverter(), test_dir) + TfDetectionApiConverter.convert, test_dir) def test_labelmap_parsing(self): text = """ diff --git a/datumaro/tests/test_voc_format.py b/datumaro/tests/test_voc_format.py index d66b6db0101..52f9403c68d 100644 --- a/datumaro/tests/test_voc_format.py +++ b/datumaro/tests/test_voc_format.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from functools import partial import numpy as np import os.path as osp @@ -158,7 +159,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocClassificationConverter(label_map='voc'), test_dir) + partial(VocClassificationConverter.convert, label_map='voc'), + test_dir) def test_can_save_voc_det(self): class TestExtractor(TestExtractorBase): @@ -213,8 +215,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocDetectionConverter(label_map='voc'), test_dir, - target_dataset=DstExtractor()) + partial(VocDetectionConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) def test_can_save_voc_segm(self): class TestExtractor(TestExtractorBase): @@ -247,8 +249,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocSegmentationConverter(label_map='voc'), test_dir, - target_dataset=DstExtractor()) + partial(VocSegmentationConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) def test_can_save_voc_segm_unpainted(self): class TestExtractor(TestExtractorBase): @@ -281,7 +283,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocSegmentationConverter(label_map='voc', apply_colormap=False), + partial(VocSegmentationConverter.convert, + label_map='voc', apply_colormap=False), test_dir, target_dataset=DstExtractor()) def test_can_save_voc_segm_with_many_instances(self): @@ -316,8 +319,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocSegmentationConverter(label_map='voc'), test_dir, - target_dataset=DstExtractor()) + partial(VocSegmentationConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) def test_can_save_voc_layout(self): class TestExtractor(TestExtractorBase): @@ -341,7 +344,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocLayoutConverter(label_map='voc'), test_dir) + partial(VocLayoutConverter.convert, label_map='voc'), test_dir) def test_can_save_voc_action(self): class TestExtractor(TestExtractorBase): @@ -395,8 +398,9 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocActionConverter(label_map='voc', allow_attributes=False), - test_dir, target_dataset=DstExtractor()) + partial(VocActionConverter.convert, + label_map='voc', allow_attributes=False), test_dir, + target_dataset=DstExtractor()) def test_can_save_dataset_with_no_subsets(self): class TestExtractor(TestExtractorBase): @@ -414,7 +418,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocConverter(label_map='voc'), test_dir) + partial(VocConverter.convert, label_map='voc'), test_dir) def test_can_save_dataset_with_images(self): class TestExtractor(TestExtractorBase): @@ -428,7 +432,8 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocConverter(label_map='voc', save_images=True), test_dir) + partial(VocConverter.convert, label_map='voc', save_images=True), + test_dir) def test_dataset_with_voc_labelmap(self): class SrcExtractor(TestExtractorBase): @@ -463,8 +468,8 @@ def categories(self): return VOC.make_voc_categories() with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), VocConverter(label_map='voc'), + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='voc'), test_dir, target_dataset=DstExtractor()) def test_dataset_with_guessed_labelmap(self): @@ -510,8 +515,8 @@ def categories(self): return VOC.make_voc_categories(label_map) with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), VocConverter(label_map='guess'), + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='guess'), test_dir, target_dataset=DstExtractor()) def test_dataset_with_source_labelmap_undefined(self): @@ -557,8 +562,8 @@ def categories(self): return VOC.make_voc_categories(label_map) with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), VocConverter(label_map='source'), + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='source'), test_dir, target_dataset=DstExtractor()) def test_dataset_with_source_labelmap_defined(self): @@ -603,8 +608,8 @@ def categories(self): return VOC.make_voc_categories(label_map) with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), VocConverter(label_map='source'), + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='source'), test_dir, target_dataset=DstExtractor()) def test_dataset_with_fixed_labelmap(self): @@ -652,8 +657,8 @@ def categories(self): return VOC.make_voc_categories(label_map) with TestDir() as test_dir: - self._test_save_and_load( - SrcExtractor(), VocConverter(label_map=label_map), + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map=label_map), test_dir, target_dataset=DstExtractor()) def test_can_save_dataset_with_image_info(self): @@ -665,7 +670,7 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocConverter(label_map='voc'), test_dir) + partial(VocConverter.convert, label_map='voc'), test_dir) def test_relative_paths(self): class TestExtractor(TestExtractorBase): @@ -678,7 +683,9 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocConverter(label_map='voc', save_images=True), test_dir) + partial(VocConverter.convert, + label_map='voc', save_images=True), + test_dir) def test_can_save_attributes(self): class TestExtractor(TestExtractorBase): @@ -708,5 +715,5 @@ def __iter__(self): with TestDir() as test_dir: self._test_save_and_load(TestExtractor(), - VocDetectionConverter(label_map='voc'), test_dir, - target_dataset=DstExtractor()) \ No newline at end of file + partial(VocConverter.convert, label_map='voc'), test_dir, + target_dataset=DstExtractor()) diff --git a/datumaro/tests/test_yolo_format.py b/datumaro/tests/test_yolo_format.py index 05fc2322e56..bf6d71aeb78 100644 --- a/datumaro/tests/test_yolo_format.py +++ b/datumaro/tests/test_yolo_format.py @@ -50,7 +50,7 @@ def categories(self): with TestDir() as test_dir: source_dataset = TestExtractor() - YoloConverter(save_images=True)(source_dataset, test_dir) + YoloConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = YoloImporter()(test_dir).make_dataset() compare_datasets(self, source_dataset, parsed_dataset) @@ -78,7 +78,7 @@ def categories(self): with TestDir() as test_dir: source_dataset = TestExtractor() - YoloConverter()(source_dataset, test_dir) + YoloConverter.convert(source_dataset, test_dir) save_image(osp.join(test_dir, 'obj_train_data', '1.jpg'), np.ones((10, 15, 3))) # put the image for dataset @@ -109,7 +109,7 @@ def categories(self): with TestDir() as test_dir: source_dataset = TestExtractor() - YoloConverter()(source_dataset, test_dir) + YoloConverter.convert(source_dataset, test_dir) parsed_dataset = YoloImporter()(test_dir, image_info={'1': (10, 15)}).make_dataset() @@ -136,8 +136,8 @@ def categories(self): with TestDir() as test_dir: source_dataset = TestExtractor() - YoloConverter(save_images=save_images)( - source_dataset, test_dir) + YoloConverter.convert(source_dataset, test_dir, + save_images=save_images) parsed_dataset = YoloImporter()(test_dir).make_dataset() compare_datasets(self, source_dataset, parsed_dataset)