Skip to content

Commit

Permalink
[Datumaro] Image control in converters (#1799)
Browse files Browse the repository at this point in the history
* Update converter interface, add image control

* Update bindings

* update changelog

* Fix build
  • Loading branch information
zhiltsov-max committed Jul 13, 2020
1 parent fd81d72 commit f807714
Show file tree
Hide file tree
Showing 34 changed files with 434 additions and 550 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Expand Up @@ -55,6 +55,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Appearance block in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- Keyframe navigations and some switchers in attribute annotation mode (<https://github.com/opencv/cvat/pull/1820>)
- [Datumaro] Added `convert` command to convert datasets directly (<https://github.com/opencv/cvat/pull/1837>)
- [Datumaro] Added an option to specify image extension when exporting datasets (<https://github.com/opencv/cvat/pull/1799>)
- [Datumaro] Added image copying when exporting datasets, if possible (<https://github.com/opencv/cvat/pull/1799>)

### Changed
- Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>)
Expand All @@ -65,6 +67,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added support for attributes in VOC XML format (https://github.com/opencv/cvat/pull/1792)
- Added annotation attributes in COCO format (https://github.com/opencv/cvat/pull/1782)
- Colorized object items in the side panel (<https://github.com/opencv/cvat/pull/1753>)
- [Datumaro] Annotation-less files are not generated anymore in COCO format, unless tasks explicitly requested (<https://github.com/opencv/cvat/pull/1799>)

### Deprecated
-
Expand Down
5 changes: 2 additions & 3 deletions cvat/apps/dataset_manager/formats/coco.py
Expand Up @@ -18,9 +18,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('coco_instances',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('coco_instances').convert(extractor,
save_dir=temp_dir, save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand Down
7 changes: 3 additions & 4 deletions cvat/apps/dataset_manager/formats/datumaro/__init__.py
Expand Up @@ -48,11 +48,10 @@ def _save_image_info(save_dir, task_data):

def _export(self, task_data, save_dir, save_images=False):
dataset = CvatTaskDataExtractor(task_data, include_images=save_images)
converter = dm_env.make_converter('datumaro_project',
save_images=save_images,
config={ 'project_name': task_data.db_task.name, }
dm_env.converters.get('datumaro_project').convert(dataset,
save_dir=save_dir, save_images=save_images,
project_config={ 'project_name': task_data.db_task.name, }
)
converter(dataset, save_dir=save_dir)

project = Project.load(save_dir)
target_dir = project.config.project_dir
Expand Down
4 changes: 2 additions & 2 deletions cvat/apps/dataset_manager/formats/labelme.py
Expand Up @@ -19,8 +19,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('label_me', save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('label_me').convert(extractor, save_dir=temp_dir,
save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand Down
12 changes: 6 additions & 6 deletions cvat/apps/dataset_manager/formats/mask.py
Expand Up @@ -26,10 +26,9 @@ def _export(dst_file, task_data, save_images=False):
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('voc_segmentation',
apply_colormap=True, label_map=make_colormap(task_data),
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('voc_segmentation').convert(extractor,
save_dir=temp_dir, save_images=save_images,
apply_colormap=True, label_map=make_colormap(task_data))

make_zip_archive(temp_dir, dst_file)

Expand Down Expand Up @@ -68,8 +67,9 @@ def normalize_label(label):
def make_colormap(task_data):
labels = sorted([label['name']
for _, label in task_data.meta['task']['labels']])
if 'background' not in labels:
labels.insert(0, 'background')
if 'background' in labels:
labels.remove('background')
labels.insert(0, 'background')

predefined = parse_default_colors()

Expand Down
5 changes: 2 additions & 3 deletions cvat/apps/dataset_manager/formats/mot.py
Expand Up @@ -19,9 +19,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('mot_seq_gt',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('mot_seq_gt').convert(extractor,
save_dir=temp_dir, save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand Down
5 changes: 2 additions & 3 deletions cvat/apps/dataset_manager/formats/pascal_voc.py
Expand Up @@ -24,9 +24,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('voc', label_map='source',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('voc').convert(extractor,
save_dir=temp_dir, save_images=save_images, label_map='source')

make_zip_archive(temp_dir, dst_file)

Expand Down
5 changes: 2 additions & 3 deletions cvat/apps/dataset_manager/formats/tfrecord.py
Expand Up @@ -27,9 +27,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('tf_detection_api',
save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('tf_detection_api').convert(extractor,
save_dir=temp_dir, save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand Down
4 changes: 2 additions & 2 deletions cvat/apps/dataset_manager/formats/yolo.py
Expand Up @@ -23,8 +23,8 @@ def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
with TemporaryDirectory() as temp_dir:
converter = dm_env.make_converter('yolo', save_images=save_images)
converter(extractor, save_dir=temp_dir)
dm_env.converters.get('yolo').convert(extractor,
save_dir=temp_dir, save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand Down
8 changes: 4 additions & 4 deletions datumaro/datumaro/cli/contexts/project/__init__.py
Expand Up @@ -340,9 +340,9 @@ def export_command(args):
raise CliException("Converter for format '%s' is not found" % \
args.format)

if hasattr(converter, 'from_cmdline'):
extra_args = converter.from_cmdline(args.extra_args)
converter = converter(**extra_args)
extra_args = converter.from_cmdline(args.extra_args)
def converter_proxy(extractor, save_dir):
return converter.convert(extractor, save_dir, **extra_args)

filter_args = FilterModes.make_filter_args(args.filter_mode)

Expand All @@ -352,7 +352,7 @@ def export_command(args):
log.info("Exporting the project...")
dataset.export_project(
save_dir=dst_dir,
converter=converter,
converter=converter_proxy,
filter_expr=args.filter,
**filter_args)
log.info("Project exported to '%s' as '%s'" % \
Expand Down
82 changes: 71 additions & 11 deletions datumaro/datumaro/components/converter.py
Expand Up @@ -3,17 +3,77 @@
#
# SPDX-License-Identifier: MIT

class Converter:
def __init__(self, cmdline_args=None):
pass
import logging as log
import os
import os.path as osp
import shutil

def __call__(self, extractor, save_dir):
raise NotImplementedError()
from datumaro.components.cli_plugin import CliPlugin
from datumaro.util.image import save_image

def _parse_cmdline(self, cmdline):
parser = self.build_cmdline_parser()

if len(cmdline) != 0 and cmdline[0] == '--':
cmdline = cmdline[1:]
args = parser.parse_args(cmdline)
return vars(args)
class IConverter:
@classmethod
def convert(cls, extractor, save_dir, **options):
raise NotImplementedError("Should be implemented in a subclass")

class Converter(IConverter, CliPlugin):
DEFAULT_IMAGE_EXT = None

@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
parser.add_argument('--image-ext', default=None,
help="Image extension (default: keep or use format default%s)" % \
(' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else ''))

return parser

@classmethod
def convert(cls, extractor, save_dir, **options):
converter = cls(extractor, save_dir, **options)
return converter.apply()

def apply(self):
raise NotImplementedError("Should be implemented in a subclass")

def __init__(self, extractor, save_dir, save_images=False,
image_ext=None, default_image_ext=None):
default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT
assert default_image_ext
self._default_image_ext = default_image_ext

self._save_images = save_images
self._image_ext = image_ext

self._extractor = extractor
self._save_dir = save_dir

def _find_image_ext(self, item):
src_ext = None
if item.has_image:
src_ext = osp.splitext(osp.basename(item.image.path))[1]

return self._image_ext or src_ext or self._default_image_ext

def _make_image_filename(self, item):
return item.id + self._find_image_ext(item)

def _save_image(self, item, path=None):
image = item.image.data
if image is None:
log.warning("Item '%s' has no image", item.id)
return item.image.path

path = path or self._make_image_filename(item)

src_ext = osp.splitext(osp.basename(item.image.path))[1]
dst_ext = osp.splitext(osp.basename(path))[1]

os.makedirs(osp.dirname(path), exist_ok=True)
if src_ext == dst_ext and osp.isfile(item.image.path):
shutil.copyfile(item.image.path, path)
else:
save_image(path, image)
10 changes: 4 additions & 6 deletions datumaro/datumaro/components/project.py
Expand Up @@ -624,9 +624,8 @@ def save(self, save_dir=None, merge=False, recursive=True,

if merge:
# merge and save the resulting dataset
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self, dataset_save_dir)
self.env.converters.get(DEFAULT_FORMAT).convert(
self, dataset_save_dir, **converter_kwargs)
else:
if recursive:
# children items should already be updated
Expand All @@ -635,9 +634,8 @@ def save(self, save_dir=None, merge=False, recursive=True,
if isinstance(source, ProjectDataset):
source.save(**converter_kwargs)

converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self.iterate_own(), dataset_save_dir)
self.env.converters.get(DEFAULT_FORMAT).convert(
self.iterate_own(), dataset_save_dir, **converter_kwargs)

project.save(save_dir)
except BaseException:
Expand Down

0 comments on commit f807714

Please sign in to comment.