From 08c35be3f93c22c7b24025b01d8d9a82ebf19d50 Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Sun, 5 Mar 2023 19:39:53 +0900 Subject: [PATCH 01/10] feat: impl export coco for video task --- fastlabel/__init__.py | 4 +- fastlabel/converters.py | 200 ++++++++++++++++++++++++++++++++-------- 2 files changed, 165 insertions(+), 39 deletions(-) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index e3878cc..35e2af1 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -2138,8 +2138,10 @@ def export_coco( raise FastLabelInvalidException( "Output file name must have a json extension", 422 ) - coco = converters.to_coco(tasks, annotations) os.makedirs(output_dir, exist_ok=True) + coco = converters.to_coco( + tasks=tasks, annotations=annotations, output_dir=output_dir + ) file_path = os.path.join(output_dir, output_file_name) with open(file_path, "w") as f: json.dump(coco, f, indent=4, ensure_ascii=False) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index dc220fe..eaa94d9 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -2,54 +2,163 @@ import math import os from concurrent.futures import ThreadPoolExecutor +from contextlib import contextmanager from datetime import datetime from decimal import Decimal +from operator import itemgetter +from pathlib import Path +from tempfile import NamedTemporaryFile from typing import List +import cv2 import geojson import numpy as np +import requests from fastlabel.const import AnnotationType from fastlabel.exceptions import FastLabelInvalidException +from fastlabel.utils import is_video_supported_ext + + +@contextmanager +def VideoCapture(*args, **kwds): + videoCapture = cv2.VideoCapture(*args, **kwds) + try: + yield videoCapture + finally: + videoCapture.release() + + +def _download_file(url: str, output_file_path: str, chunk_size: int = 8192) -> str: + with requests.get(url, stream=True) as stream: + stream.raise_for_status() + with open(file=output_file_path, mode="wb") as file: + for chunk in stream.iter_content(chunk_size=chunk_size): + if chunk: + file.write(chunk) + return file.name + + +def _export_image_files_for_video_file( + file_path: str, + output_dir_path: str, + basename: str, +): + image_file_names = [] + with VideoCapture(file_path) as cap: + if not cap.isOpened(): + raise FastLabelInvalidException( + ( + "Video to image conversion failed. Video could not be opened.", + " Download may have failed or there is a problem with the video.", + ), + 422, + ) + digit = len(str(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))) + frame_num = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + image_file_name = f"{basename}_{str(frame_num).zfill(digit)}.jpg" + image_file_path = os.path.join(output_dir_path, image_file_name) + os.makedirs(output_dir_path, exist_ok=True) + cv2.imwrite(image_file_path, frame) + frame_num += 1 + image_file_names.append(image_file_name) + return image_file_names + + +def _export_image_files_for_video_task(video_task: dict, output_dir_path: str): + with NamedTemporaryFile(prefix="fastlabel-sdk-") as video_file: + video_file_path = _download_file( + url=video_task["url"], output_file_path=video_file.name + ) + return _export_image_files_for_video_file( + file_path=video_file_path, + output_dir_path=output_dir_path, + basename=Path(video_task["name"]).stem, + ) + # COCO -def to_coco(tasks: list, annotations: list = []) -> dict: +def to_coco(tasks: list, output_dir: str, annotations: list = []) -> dict: # Get categories - categories = __get_categories(tasks, annotations) + categories = __get_coco_categories(tasks, annotations) # Get images and annotations images = [] annotations = [] annotation_id = 0 - image_id = 0 + image_index = 0 for task in tasks: if task["height"] == 0 or task["width"] == 0: continue - image_id += 1 - image = { - "file_name": task["name"], - "height": task["height"], - "width": task["width"], - "id": image_id, - } - images.append(image) + if is_video_supported_ext(task["name"]): + image_file_names = _export_image_files_for_video_task( + task, str((Path(output_dir) / "images").resolve()) + ) + task_images = _generate_coco_images( + image_file_names=image_file_names, + height=task["height"], + width=task["width"], + offset=image_index, + ) + image_index = len(task_images) + image_index - data = [ - {"annotation": annotation, "categories": categories, "image": image} - for annotation in task["annotations"] - ] - with ThreadPoolExecutor(max_workers=8) as executor: - results = executor.map(__to_annotation, data) + def get_annotation_points(annotation: dict, index: int): + points = annotation.get("points") + if not points: + return None + video_point_datum = points.get(str(index)) + if not video_point_datum: + return None + return video_point_datum["value"] - for result in results: - annotation_id += 1 - if not result: - continue - result["id"] = annotation_id - annotations.append(result) + else: + image_index += 1 + task_images = [ + { + "file_name": task["name"], + "height": task["height"], + "width": task["width"], + "id": image_index, + } + ] + + def get_annotation_points(annotation: dict, index: int): + return annotation.get("points") + + for index, task_image in enumerate(task_images, 1): + param = [ + { + "annotation_value": annotation["value"], + "annotation_type": annotation["type"], + "annotation_points": get_annotation_points(annotation, index), + "annotation_keypoints": annotation.get("keypoints"), + "categories": categories, + "image_id": task_image["id"], + } + for annotation in task["annotations"] + ] + + with ThreadPoolExecutor(max_workers=8) as executor: + image_annotations = executor.map(__to_coco_annotation, param) + + for image_annotation in sorted( + image_annotations, key=itemgetter("image_id", "category_id") + ): + annotation_id += 1 + if not image_annotation: + continue + image_annotation["id"] = annotation_id + annotations.append(image_annotation) + + images.append(task_image) return { "images": images, @@ -58,6 +167,20 @@ def to_coco(tasks: list, annotations: list = []) -> dict: } +def _generate_coco_images( + image_file_names: str, height: int, width: int, offset: int = 0 +): + return [ + { + "file_name": file_name, + "height": height, + "width": width, + "id": (index + 1) + offset, + } + for index, file_name in enumerate(image_file_names) + ] + + def __get_coco_skeleton(keypoints: list) -> list: keypoint_id_skeleton_index_map = {} for index, keypoint in enumerate(keypoints, 1): @@ -77,7 +200,7 @@ def __get_coco_skeleton(keypoints: list) -> list: return skeleton -def __get_categories(tasks: list, annotations: list) -> list: +def __get_coco_categories(tasks: list, annotations: list) -> list: categories = [] values = [] for task in tasks: @@ -90,7 +213,7 @@ def __get_categories(tasks: list, annotations: list) -> list: ]: continue values.append(task_annotation["value"]) - values = list(set(values)) + values = sorted(list(set(values))) # Create categories from task annotations (not support pose esitimation) if not annotations: @@ -99,6 +222,7 @@ def __get_categories(tasks: list, annotations: list) -> list: "skeleton": [], "keypoints": [], "keypoint_colors": [], + # BUG: アノテーションごとのcolorを設定しなければならない "color": task_annotation["color"], "supercategory": value, "id": index, @@ -135,13 +259,13 @@ def __get_categories(tasks: list, annotations: list) -> list: return categories -def __to_annotation(data: dict) -> dict: - annotation = data["annotation"] +def __to_coco_annotation(data: dict) -> dict: categories = data["categories"] - image = data["image"] - points = annotation.get("points") - keypoints = annotation.get("keypoints") - annotation_type = annotation["type"] + image_id = data["image_id"] + points = data["annotation_points"] + keypoints = data["annotation_keypoints"] + annotation_type = data["annotation_type"] + annotation_value = data["annotation_value"] annotation_id = 0 if annotation_type not in [ @@ -161,14 +285,14 @@ def __to_annotation(data: dict) -> dict: ): return None - category = __get_category_by_name(categories, annotation["value"]) + category = __get_coco_category_by_name(categories, annotation_value) - return __get_annotation( - annotation_id, points, keypoints, category["id"], image, annotation_type + return __get_coco_annotation( + annotation_id, points, keypoints, category["id"], image_id, annotation_type ) -def __get_category_by_name(categories: list, name: str) -> str: +def __get_coco_category_by_name(categories: list, name: str) -> str: category = [category for category in categories if category["name"] == name][0] return category @@ -186,12 +310,12 @@ def __get_coco_annotation_keypoints(keypoints: list) -> list: return coco_annotation_keypoints -def __get_annotation( +def __get_coco_annotation( id_: int, points: list, keypoints: list, category_id: int, - image: dict, + image_id: str, annotation_type: str, ) -> dict: annotation = {} @@ -202,7 +326,7 @@ def __get_annotation( annotation["segmentation"] = __to_coco_segmentation(annotation_type, points) annotation["iscrowd"] = 0 annotation["area"] = __to_area(annotation_type, points) - annotation["image_id"] = image["id"] + annotation["image_id"] = image_id annotation["bbox"] = __to_bbox(annotation_type, points) annotation["category_id"] = category_id annotation["id"] = id_ From 6ac4ecce6f111b2b1f8dd54f2f2194554f312d6e Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Sun, 5 Mar 2023 21:53:22 +0900 Subject: [PATCH 02/10] refactor: get annotation points --- fastlabel/converters.py | 155 +++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 74 deletions(-) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index eaa94d9..c0b99af 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -19,69 +19,6 @@ from fastlabel.exceptions import FastLabelInvalidException from fastlabel.utils import is_video_supported_ext - -@contextmanager -def VideoCapture(*args, **kwds): - videoCapture = cv2.VideoCapture(*args, **kwds) - try: - yield videoCapture - finally: - videoCapture.release() - - -def _download_file(url: str, output_file_path: str, chunk_size: int = 8192) -> str: - with requests.get(url, stream=True) as stream: - stream.raise_for_status() - with open(file=output_file_path, mode="wb") as file: - for chunk in stream.iter_content(chunk_size=chunk_size): - if chunk: - file.write(chunk) - return file.name - - -def _export_image_files_for_video_file( - file_path: str, - output_dir_path: str, - basename: str, -): - image_file_names = [] - with VideoCapture(file_path) as cap: - if not cap.isOpened(): - raise FastLabelInvalidException( - ( - "Video to image conversion failed. Video could not be opened.", - " Download may have failed or there is a problem with the video.", - ), - 422, - ) - digit = len(str(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))) - frame_num = 0 - while True: - ret, frame = cap.read() - if not ret: - break - - image_file_name = f"{basename}_{str(frame_num).zfill(digit)}.jpg" - image_file_path = os.path.join(output_dir_path, image_file_name) - os.makedirs(output_dir_path, exist_ok=True) - cv2.imwrite(image_file_path, frame) - frame_num += 1 - image_file_names.append(image_file_name) - return image_file_names - - -def _export_image_files_for_video_task(video_task: dict, output_dir_path: str): - with NamedTemporaryFile(prefix="fastlabel-sdk-") as video_file: - video_file_path = _download_file( - url=video_task["url"], output_file_path=video_file.name - ) - return _export_image_files_for_video_file( - file_path=video_file_path, - output_dir_path=output_dir_path, - basename=Path(video_task["name"]).stem, - ) - - # COCO @@ -110,14 +47,8 @@ def to_coco(tasks: list, output_dir: str, annotations: list = []) -> dict: ) image_index = len(task_images) + image_index - def get_annotation_points(annotation: dict, index: int): - points = annotation.get("points") - if not points: - return None - video_point_datum = points.get(str(index)) - if not video_point_datum: - return None - return video_point_datum["value"] + def get_annotation_points(anno, index): + return _get_annotation_points_for_video_annotation(anno, index) else: image_index += 1 @@ -130,8 +61,8 @@ def get_annotation_points(annotation: dict, index: int): } ] - def get_annotation_points(annotation: dict, index: int): - return annotation.get("points") + def get_annotation_points(anno, _): + return _get_annotation_points_for_image_annotation(anno) for index, task_image in enumerate(task_images, 1): param = [ @@ -222,7 +153,7 @@ def __get_coco_categories(tasks: list, annotations: list) -> list: "skeleton": [], "keypoints": [], "keypoint_colors": [], - # BUG: アノテーションごとのcolorを設定しなければならない + # BUG: All are set to the same color. "color": task_annotation["color"], "supercategory": value, "id": index, @@ -1035,3 +966,79 @@ def __get_annotation_type_by_labelme(shape_type: str) -> str: if shape_type == "line": return "line" return None + + +@contextmanager +def VideoCapture(*args, **kwds): + videoCapture = cv2.VideoCapture(*args, **kwds) + try: + yield videoCapture + finally: + videoCapture.release() + + +def _download_file(url: str, output_file_path: str, chunk_size: int = 8192) -> str: + with requests.get(url, stream=True) as stream: + stream.raise_for_status() + with open(file=output_file_path, mode="wb") as file: + for chunk in stream.iter_content(chunk_size=chunk_size): + if chunk: + file.write(chunk) + return file.name + + +def _export_image_files_for_video_file( + file_path: str, + output_dir_path: str, + basename: str, +): + image_file_names = [] + with VideoCapture(file_path) as cap: + if not cap.isOpened(): + raise FastLabelInvalidException( + ( + "Video to image conversion failed. Video could not be opened.", + " Download may have failed or there is a problem with the video.", + ), + 422, + ) + digit = len(str(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))) + frame_num = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + image_file_name = f"{basename}_{str(frame_num).zfill(digit)}.jpg" + image_file_path = os.path.join(output_dir_path, image_file_name) + os.makedirs(output_dir_path, exist_ok=True) + cv2.imwrite(image_file_path, frame) + frame_num += 1 + image_file_names.append(image_file_name) + return image_file_names + + +def _export_image_files_for_video_task(video_task: dict, output_dir_path: str): + with NamedTemporaryFile(prefix="fastlabel-sdk-") as video_file: + video_file_path = _download_file( + url=video_task["url"], output_file_path=video_file.name + ) + return _export_image_files_for_video_file( + file_path=video_file_path, + output_dir_path=output_dir_path, + basename=Path(video_task["name"]).stem, + ) + + +def _get_annotation_points_for_video_annotation(annotation: dict, index: int): + points = annotation.get("points") + if not points: + return None + video_point_datum = points.get(str(index)) + if not video_point_datum: + return None + return video_point_datum["value"] + + +def _get_annotation_points_for_image_annotation(annotation: dict): + return annotation.get("points") From 727c52bb1019b684b198fe65d42c02ce08abb55e Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Sun, 5 Mar 2023 22:32:15 +0900 Subject: [PATCH 03/10] feat: impl export yolo for video task --- fastlabel/__init__.py | 6 ++-- fastlabel/converters.py | 72 +++++++++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index 35e2af1..ca4557e 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -2162,7 +2162,10 @@ def export_yolo( classes is a list of annotation values. e.g. ['dog','bird'] (Optional). output_dir is output directory(default: output/yolo) (Optional). """ - annos, categories = converters.to_yolo(tasks, classes) + os.makedirs(output_dir, exist_ok=True) + annos, categories = converters.to_yolo( + tasks=tasks, classes=classes, output_dir=output_dir + ) for anno in annos: file_name = anno["filename"] basename = utils.get_basename(file_name) @@ -2173,7 +2176,6 @@ def export_yolo( f.write(obj) f.write("\n") classes_file_path = os.path.join(output_dir, "classes.txt") - os.makedirs(os.path.dirname(classes_file_path), exist_ok=True) with open(classes_file_path, "w", encoding="utf8") as f: for category in categories: f.write(category["name"]) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index c0b99af..b8f1516 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -357,12 +357,12 @@ def __serialize(value: any) -> any: # YOLO -def to_yolo(tasks: list, classes: list) -> tuple: +def to_yolo(tasks: list, classes: list, output_dir: str) -> tuple: if len(classes) == 0: - coco = to_coco(tasks) + coco = to_coco(tasks=tasks, output_dir=output_dir) return __coco2yolo(coco) else: - return __to_yolo(tasks, classes) + return __to_yolo(tasks=tasks, classes=classes, output_dir=output_dir) def __coco2yolo(coco: dict) -> tuple: @@ -410,37 +410,59 @@ def __coco2yolo(coco: dict) -> tuple: return annos, categories -def __to_yolo(tasks: list, classes: list) -> tuple: +def __to_yolo(tasks: list, classes: list, output_dir: str) -> tuple: annos = [] for task in tasks: if task["height"] == 0 or task["width"] == 0: continue - objs = [] - data = [ - {"annotation": annotation, "task": task, "classes": classes} - for annotation in task["annotations"] - ] - with ThreadPoolExecutor(max_workers=8) as executor: - results = executor.map(__get_yolo_annotation, data) - for result in results: - if not result: - continue - objs.append(" ".join(result)) - anno = {"filename": task["name"], "object": objs} + + if is_video_supported_ext(task["name"]): + image_file_names = _export_image_files_for_video_task( + task, str((Path(output_dir) / "images").resolve()) + ) + + def get_annotation_points(anno, index): + return _get_annotation_points_for_video_annotation(anno, index) + + else: + image_file_names = [task["name"]] + + def get_annotation_points(anno, _): + return _get_annotation_points_for_image_annotation(anno) + + for index, image_file_name in enumerate(image_file_names, 1): + param = [ + { + "annotation_value": annotation["value"], + "annotation_type": annotation["type"], + "annotation_points": get_annotation_points(annotation, index), + "width": task["width"], + "height": task["height"], + "classes": classes, + } + for annotation in task["annotations"] + ] + with ThreadPoolExecutor(max_workers=8) as executor: + image_anno_dicts = executor.map(__get_yolo_annotation, param) + + image_anno_rows = [ + " ".join(anno) + for anno in sorted(image_anno_dicts, key=itemgetter(0)) + if anno + ] + anno = {"filename": image_file_name, "object": image_anno_rows} annos.append(anno) - categories = map(lambda val: {"name": val}, classes) + categories = map(lambda val: {"name": val}, sorted(classes)) return annos, categories def __get_yolo_annotation(data: dict) -> dict: - annotation = data["annotation"] - points = annotation["points"] - annotation_type = annotation["type"] - value = annotation["value"] + points = data["annotation_points"] + annotation_type = data["annotation_type"] + value = data["annotation_value"] classes = list(data["classes"]) - task = data["task"] if ( annotation_type != AnnotationType.bbox.value and annotation_type != AnnotationType.polygon.value @@ -452,11 +474,11 @@ def __get_yolo_annotation(data: dict) -> dict: int(points[0]) == int(points[2]) or int(points[1]) == int(points[3]) ): return None - if not annotation["value"] in classes: + if value not in classes: return None - dw = 1.0 / task["width"] - dh = 1.0 / task["height"] + dw = 1.0 / data["width"] + dh = 1.0 / data["height"] bbox = __to_bbox(annotation_type, points) xmin = bbox[0] From 78c20064004bd701a94a600ce3e4a9c9b37f74b0 Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Mon, 6 Mar 2023 00:22:38 +0900 Subject: [PATCH 04/10] feat: impl export PascalVOC for video task --- fastlabel/__init__.py | 9 ++-- fastlabel/converters.py | 97 ++++++++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index ca4557e..f40ac3c 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -2190,13 +2190,16 @@ def export_pascalvoc( tasks is a list of tasks (Required). output_dir is output directory(default: output/pascalvoc) (Optional). """ - pascalvoc = converters.to_pascalvoc(tasks) + os.makedirs(output_dir, exist_ok=True) + pascalvoc = converters.to_pascalvoc(tasks=tasks, output_dir=output_dir) for voc in pascalvoc: file_name = voc["annotation"]["filename"] basename = utils.get_basename(file_name) - file_path = os.path.join(output_dir, basename + ".xml") + file_path = os.path.join(output_dir, "annotations", basename + ".xml") os.makedirs(os.path.dirname(file_path), exist_ok=True) - xml = xmltodict.unparse(voc, pretty=True, full_document=False) + xml = xmltodict.unparse( + voc, pretty=True, indent=" ", full_document=False + ) with open(file_path, "w", encoding="utf8") as f: f.write(xml) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index b8f1516..dfdf6d9 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -65,7 +65,7 @@ def get_annotation_points(anno, _): return _get_annotation_points_for_image_annotation(anno) for index, task_image in enumerate(task_images, 1): - param = [ + params = [ { "annotation_value": annotation["value"], "annotation_type": annotation["type"], @@ -78,7 +78,7 @@ def get_annotation_points(anno, _): ] with ThreadPoolExecutor(max_workers=8) as executor: - image_annotations = executor.map(__to_coco_annotation, param) + image_annotations = executor.map(__to_coco_annotation, params) for image_annotation in sorted( image_annotations, key=itemgetter("image_id", "category_id") @@ -431,7 +431,7 @@ def get_annotation_points(anno, _): return _get_annotation_points_for_image_annotation(anno) for index, image_file_name in enumerate(image_file_names, 1): - param = [ + params = [ { "annotation_value": annotation["value"], "annotation_type": annotation["type"], @@ -443,7 +443,7 @@ def get_annotation_points(anno, _): for annotation in task["annotations"] ] with ThreadPoolExecutor(max_workers=8) as executor: - image_anno_dicts = executor.map(__get_yolo_annotation, param) + image_anno_dicts = executor.map(__get_yolo_annotation, params) image_anno_rows = [ " ".join(anno) @@ -507,65 +507,83 @@ def _truncate(n, decimals=0) -> float: # Pascal VOC -def to_pascalvoc(tasks: list) -> list: +def to_pascalvoc(tasks: list, output_dir: str) -> list: pascalvoc = [] for task in tasks: if task["height"] == 0 or task["width"] == 0: continue - pascal_objs = [] - data = [{"annotation": annotation} for annotation in task["annotations"]] - with ThreadPoolExecutor(max_workers=8) as executor: - results = executor.map(__get_pascalvoc_obj, data) + if is_video_supported_ext(task["name"]): + image_file_names = _export_image_files_for_video_task( + task, str((Path(output_dir) / "images").resolve()) + ) - for result in results: - if not result: - continue - pascal_objs.append(result) + def get_annotation_points(anno, index): + return _get_annotation_points_for_video_annotation(anno, index) - voc = { - "annotation": { - "filename": task["name"], - "size": { - "width": task["width"], - "height": task["height"], - "depth": 3, - }, - "segmented": 0, - "object": pascal_objs, + else: + image_file_names = [task["name"]] + + def get_annotation_points(anno, _): + return _get_annotation_points_for_image_annotation(anno) + + for index, image_file_name in enumerate(image_file_names, 1): + params = [ + { + "annotation_type": annotation["type"], + "annotation_value": annotation["value"], + "annotation_points": get_annotation_points(annotation, index), + "annotation_attributes": annotation["attributes"], + } + for annotation in task["annotations"] + ] + + with ThreadPoolExecutor(max_workers=8) as executor: + pascalvoc_objs = executor.map(__get_pascalvoc_obj, params) + + voc = { + "annotation": { + "filename": image_file_name, + "size": { + "width": task["width"], + "height": task["height"], + "depth": 3, + }, + "segmented": 0, + "object": list( + sorted(filter(None, pascalvoc_objs), key=itemgetter("name")) + ), + } } - } - pascalvoc.append(voc) + pascalvoc.append(voc) return pascalvoc def __get_pascalvoc_obj(data: dict) -> dict: - annotation = data["annotation"] - points = annotation["points"] - annotation_type = annotation["type"] - if ( - annotation_type != AnnotationType.bbox.value - and annotation_type != AnnotationType.polygon.value - ): + points = data["annotation_points"] + type = data["annotation_type"] + value = data["annotation_value"] + attributes = data["annotation_attributes"] + if type != AnnotationType.bbox.value and type != AnnotationType.polygon.value: return None if not points or len(points) == 0: return None - if annotation_type == AnnotationType.bbox.value and ( + if type == AnnotationType.bbox.value and ( int(points[0]) == int(points[2]) or int(points[1]) == int(points[3]) ): return None - bbox = __to_bbox(annotation_type, points) + bbox = __to_bbox(type, points) x = bbox[0] y = bbox[1] w = bbox[2] h = bbox[3] return { - "name": annotation["value"], + "name": value, "pose": "Unspecified", - "truncated": __get_pascalvoc_tag_value(annotation, "truncated"), - "occluded": __get_pascalvoc_tag_value(annotation, "occluded"), - "difficult": __get_pascalvoc_tag_value(annotation, "difficult"), + "truncated": __get_pascalvoc_tag_value(attributes, "truncated"), + "occluded": __get_pascalvoc_tag_value(attributes, "occluded"), + "difficult": __get_pascalvoc_tag_value(attributes, "difficult"), "bndbox": { "xmin": math.floor(x), "ymin": math.floor(y), @@ -575,8 +593,7 @@ def __get_pascalvoc_obj(data: dict) -> dict: } -def __get_pascalvoc_tag_value(annotation: dict, target_tag_name: str) -> int: - attributes = annotation["attributes"] +def __get_pascalvoc_tag_value(attributes: list, target_tag_name: str) -> int: if not attributes: return 0 related_attr = next( From b786825bc0d385891cc220812ca6e1938a0b677e Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Mon, 6 Mar 2023 16:44:29 +0900 Subject: [PATCH 05/10] fix: filter annotations for export video task --- fastlabel/converters.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index dfdf6d9..690aa5c 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -8,7 +8,7 @@ from operator import itemgetter from pathlib import Path from tempfile import NamedTemporaryFile -from typing import List +from typing import List, Optional import cv2 import geojson @@ -80,8 +80,12 @@ def get_annotation_points(anno, _): with ThreadPoolExecutor(max_workers=8) as executor: image_annotations = executor.map(__to_coco_annotation, params) + filtered_image_annotations = list(filter(None, image_annotations)) + if len(filtered_image_annotations) <= 0: + continue + for image_annotation in sorted( - image_annotations, key=itemgetter("image_id", "category_id") + filtered_image_annotations, key=itemgetter("image_id", "category_id") ): annotation_id += 1 if not image_annotation: @@ -217,15 +221,21 @@ def __to_coco_annotation(data: dict) -> dict: return None category = __get_coco_category_by_name(categories, annotation_value) + if category is None: + return None return __get_coco_annotation( annotation_id, points, keypoints, category["id"], image_id, annotation_type ) -def __get_coco_category_by_name(categories: list, name: str) -> str: - category = [category for category in categories if category["name"] == name][0] - return category +def __get_coco_category_by_name(categories: list, name: str) -> Optional[dict]: + matched_categories = [ + category for category in categories if category["name"] == name + ] + if len(matched_categories) >= 1: + return matched_categories[0] + return None def __get_coco_annotation_keypoints(keypoints: list) -> list: @@ -445,9 +455,13 @@ def get_annotation_points(anno, _): with ThreadPoolExecutor(max_workers=8) as executor: image_anno_dicts = executor.map(__get_yolo_annotation, params) + filtered_image_anno_dicts = list(filter(None, image_anno_dicts)) + if len(filtered_image_anno_dicts) <= 0: + continue + image_anno_rows = [ " ".join(anno) - for anno in sorted(image_anno_dicts, key=itemgetter(0)) + for anno in sorted(filtered_image_anno_dicts, key=itemgetter(0)) if anno ] anno = {"filename": image_file_name, "object": image_anno_rows} @@ -541,6 +555,10 @@ def get_annotation_points(anno, _): with ThreadPoolExecutor(max_workers=8) as executor: pascalvoc_objs = executor.map(__get_pascalvoc_obj, params) + filtered_pascalvoc_objs = list(filter(None, pascalvoc_objs)) + if len(filtered_pascalvoc_objs) <= 0: + continue + voc = { "annotation": { "filename": image_file_name, @@ -551,7 +569,7 @@ def get_annotation_points(anno, _): }, "segmented": 0, "object": list( - sorted(filter(None, pascalvoc_objs), key=itemgetter("name")) + sorted(filtered_pascalvoc_objs, key=itemgetter("name")) ), } } From 83dfcca1b12c82e6cc07d7b9235ce8be795dd6fc Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Mon, 6 Mar 2023 17:33:12 +0900 Subject: [PATCH 06/10] fix: Corrected an error in point conditions --- fastlabel/converters.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index 690aa5c..94e3f03 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -210,9 +210,8 @@ def __to_coco_annotation(data: dict) -> dict: AnnotationType.pose_estimation.value, ]: return None - if ( - annotation_type != AnnotationType.pose_estimation.value - and (not points or len(points)) == 0 + if annotation_type != AnnotationType.pose_estimation.value and ( + not points or (len(points) == 0) ): return None if annotation_type == AnnotationType.bbox.value and ( From c990d4bd531ab937c662d03a9f72241837cfc8ca Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Mon, 6 Mar 2023 23:15:12 +0900 Subject: [PATCH 07/10] fix: coco annotation order --- fastlabel/converters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index 94e3f03..147986a 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -85,7 +85,8 @@ def get_annotation_points(anno, _): continue for image_annotation in sorted( - filtered_image_annotations, key=itemgetter("image_id", "category_id") + filtered_image_annotations, + key=itemgetter("image_id", "category_id", "area"), ): annotation_id += 1 if not image_annotation: From 5dd03340c529621a5e155808659ea8c8c64759c9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Tue, 7 Mar 2023 00:07:06 +0900 Subject: [PATCH 08/10] fix: Change task type determination process --- fastlabel/__init__.py | 45 +++++++++++++++++++++++++++++++++++++---- fastlabel/converters.py | 27 ++++++++++++++++--------- fastlabel/utils.py | 7 ++++++- 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index f40ac3c..6254c24 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -2120,6 +2120,7 @@ def __get_yolo_format_annotations(self, dataset_folder_path: str) -> dict: def export_coco( self, + project: str, tasks: list, annotations: list = [], output_dir: str = os.path.join("output", "coco"), @@ -2129,6 +2130,7 @@ def export_coco( Convert tasks to COCO format and export as a file. If you pass annotations, you can export Pose Estimation type annotations. + project is slug of your project (Required). tasks is a list of tasks (Required). annotations is a list of annotations (Optional). output_dir is output directory(default: output/coco) (Optional). @@ -2138,9 +2140,19 @@ def export_coco( raise FastLabelInvalidException( "Output file name must have a json extension", 422 ) + + project = self.find_project_by_slug(project) + if project is None: + raise FastLabelInvalidException( + "Project not found. Check the project slag.", 422 + ) + os.makedirs(output_dir, exist_ok=True) coco = converters.to_coco( - tasks=tasks, annotations=annotations, output_dir=output_dir + project_type=project["type"], + tasks=tasks, + annotations=annotations, + output_dir=output_dir, ) file_path = os.path.join(output_dir, output_file_name) with open(file_path, "w") as f: @@ -2148,6 +2160,7 @@ def export_coco( def export_yolo( self, + project: str, tasks: list, classes: list = [], output_dir: str = os.path.join("output", "yolo"), @@ -2158,13 +2171,24 @@ def export_yolo( If not , classes.txt will be generated based on passed tasks . (Annotations never used in your project will not be exported.) + project is slug of your project (Required). tasks is a list of tasks (Required). classes is a list of annotation values. e.g. ['dog','bird'] (Optional). output_dir is output directory(default: output/yolo) (Optional). """ + + project = self.find_project_by_slug(project) + if project is None: + raise FastLabelInvalidException( + "Project not found. Check the project slag.", 422 + ) + os.makedirs(output_dir, exist_ok=True) annos, categories = converters.to_yolo( - tasks=tasks, classes=classes, output_dir=output_dir + project_type=project["type"], + tasks=tasks, + classes=classes, + output_dir=output_dir, ) for anno in annos: file_name = anno["filename"] @@ -2182,16 +2206,29 @@ def export_yolo( f.write("\n") def export_pascalvoc( - self, tasks: list, output_dir: str = os.path.join("output", "pascalvoc") + self, + project: str, + tasks: list, + output_dir: str = os.path.join("output", "pascalvoc"), ) -> None: """ Convert tasks to Pascal VOC format as files. + project is slug of your project (Required). tasks is a list of tasks (Required). output_dir is output directory(default: output/pascalvoc) (Optional). """ + + project = self.find_project_by_slug(project) + if project is None: + raise FastLabelInvalidException( + "Project not found. Check the project slag.", 422 + ) + os.makedirs(output_dir, exist_ok=True) - pascalvoc = converters.to_pascalvoc(tasks=tasks, output_dir=output_dir) + pascalvoc = converters.to_pascalvoc( + project_type=project["type"], tasks=tasks, output_dir=output_dir + ) for voc in pascalvoc: file_name = voc["annotation"]["filename"] basename = utils.get_basename(file_name) diff --git a/fastlabel/converters.py b/fastlabel/converters.py index 147986a..b40379f 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -17,12 +17,14 @@ from fastlabel.const import AnnotationType from fastlabel.exceptions import FastLabelInvalidException -from fastlabel.utils import is_video_supported_ext +from fastlabel.utils import is_video_project_type # COCO -def to_coco(tasks: list, output_dir: str, annotations: list = []) -> dict: +def to_coco( + project_type: str, tasks: list, output_dir: str, annotations: list = [] +) -> dict: # Get categories categories = __get_coco_categories(tasks, annotations) @@ -35,7 +37,7 @@ def to_coco(tasks: list, output_dir: str, annotations: list = []) -> dict: if task["height"] == 0 or task["width"] == 0: continue - if is_video_supported_ext(task["name"]): + if is_video_project_type(project_type): image_file_names = _export_image_files_for_video_task( task, str((Path(output_dir) / "images").resolve()) ) @@ -367,12 +369,17 @@ def __serialize(value: any) -> any: # YOLO -def to_yolo(tasks: list, classes: list, output_dir: str) -> tuple: +def to_yolo(project_type: str, tasks: list, classes: list, output_dir: str) -> tuple: if len(classes) == 0: - coco = to_coco(tasks=tasks, output_dir=output_dir) + coco = to_coco(project_type=project_type, tasks=tasks, output_dir=output_dir) return __coco2yolo(coco) else: - return __to_yolo(tasks=tasks, classes=classes, output_dir=output_dir) + return __to_yolo( + project_type=project_type, + tasks=tasks, + classes=classes, + output_dir=output_dir, + ) def __coco2yolo(coco: dict) -> tuple: @@ -420,13 +427,13 @@ def __coco2yolo(coco: dict) -> tuple: return annos, categories -def __to_yolo(tasks: list, classes: list, output_dir: str) -> tuple: +def __to_yolo(project_type: str, tasks: list, classes: list, output_dir: str) -> tuple: annos = [] for task in tasks: if task["height"] == 0 or task["width"] == 0: continue - if is_video_supported_ext(task["name"]): + if is_video_project_type(project_type): image_file_names = _export_image_files_for_video_task( task, str((Path(output_dir) / "images").resolve()) ) @@ -521,13 +528,13 @@ def _truncate(n, decimals=0) -> float: # Pascal VOC -def to_pascalvoc(tasks: list, output_dir: str) -> list: +def to_pascalvoc(project_type: str, tasks: list, output_dir: str) -> list: pascalvoc = [] for task in tasks: if task["height"] == 0 or task["width"] == 0: continue - if is_video_supported_ext(task["name"]): + if is_video_project_type(project_type): image_file_names = _export_image_files_for_video_task( task, str((Path(output_dir) / "images").resolve()) ) diff --git a/fastlabel/utils.py b/fastlabel/utils.py index f951992..dd8c2c9 100644 --- a/fastlabel/utils.py +++ b/fastlabel/utils.py @@ -60,6 +60,10 @@ def is_pcd_supported_size(file_path: str) -> bool: return os.path.getsize(file_path) <= const.SUPPORTED_PCD_SIZE +def is_video_project_type(project_type: str): + return type(project_type) is str and project_type.startswith("video_") + + def is_json_ext(file_name: str) -> bool: return file_name.lower().endswith(".json") @@ -101,7 +105,8 @@ def sort_segmentation_points(points: List[int]) -> List[int]: if index == 0: continue if ( - val[1] <= points_list[base_point_index][1] and val[0] <= points_list[base_point_index][0] + val[1] <= points_list[base_point_index][1] + and val[0] <= points_list[base_point_index][0] ): base_point_index = index new_points_array = np.vstack( From f7972e6f444d23a77145a63b0ae5da8b456233dd Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Tue, 7 Mar 2023 12:55:28 +0900 Subject: [PATCH 09/10] fix: Outputs unannotated tasks --- fastlabel/__init__.py | 5 ++++- fastlabel/converters.py | 31 ++++++++++++++++--------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index 6254c24..7856222 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -2196,7 +2196,10 @@ def export_yolo( file_path = os.path.join(output_dir, "annotations", basename + ".txt") os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, "w", encoding="utf8") as f: - for obj in anno["object"]: + objects = anno.get("object") + if objects is None: + continue + for obj in objects: f.write(obj) f.write("\n") classes_file_path = os.path.join(output_dir, "classes.txt") diff --git a/fastlabel/converters.py b/fastlabel/converters.py index b40379f..a99b7a6 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -67,6 +67,7 @@ def get_annotation_points(anno, _): return _get_annotation_points_for_image_annotation(anno) for index, task_image in enumerate(task_images, 1): + images.append(task_image) params = [ { "annotation_value": annotation["value"], @@ -96,8 +97,6 @@ def get_annotation_points(anno, _): image_annotation["id"] = annotation_id annotations.append(image_annotation) - images.append(task_image) - return { "images": images, "categories": categories, @@ -463,15 +462,16 @@ def get_annotation_points(anno, _): image_anno_dicts = executor.map(__get_yolo_annotation, params) filtered_image_anno_dicts = list(filter(None, image_anno_dicts)) - if len(filtered_image_anno_dicts) <= 0: - continue - image_anno_rows = [ - " ".join(anno) - for anno in sorted(filtered_image_anno_dicts, key=itemgetter(0)) - if anno - ] - anno = {"filename": image_file_name, "object": image_anno_rows} + anno = {"filename": image_file_name} + + if len(filtered_image_anno_dicts) > 0: + anno["object"] = [ + " ".join(anno) + for anno in sorted(filtered_image_anno_dicts, key=itemgetter(0)) + if anno + ] + annos.append(anno) categories = map(lambda val: {"name": val}, sorted(classes)) @@ -563,8 +563,6 @@ def get_annotation_points(anno, _): pascalvoc_objs = executor.map(__get_pascalvoc_obj, params) filtered_pascalvoc_objs = list(filter(None, pascalvoc_objs)) - if len(filtered_pascalvoc_objs) <= 0: - continue voc = { "annotation": { @@ -575,11 +573,14 @@ def get_annotation_points(anno, _): "depth": 3, }, "segmented": 0, - "object": list( - sorted(filtered_pascalvoc_objs, key=itemgetter("name")) - ), } } + + if len(filtered_pascalvoc_objs) > 0: + voc["annotation"]["object"] = list( + sorted(filtered_pascalvoc_objs, key=itemgetter("name")) + ) + pascalvoc.append(voc) return pascalvoc From 0b44e6fd676a659d11182db79cc6ab76a9bfe26a Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Fri, 17 Mar 2023 15:17:21 +0900 Subject: [PATCH 10/10] docs: Change description to match method signature --- README.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0d2796a..78c4743 100644 --- a/README.md +++ b/README.md @@ -2069,14 +2069,15 @@ Support the following annotation types. Get tasks and export as a [COCO format](https://cocodataset.org/#format-data) file. ```python -tasks = client.get_image_tasks(project="YOUR_PROJECT_SLUG") -client.export_coco(tasks) +project_slug = "YOUR_PROJECT_SLUG" +tasks = client.get_image_tasks(project=project_slug) +client.export_coco(project=project_slug, tasks=tasks) ``` Export with specifying output directory and file name. ```python -client.export_coco(tasks=tasks, output_dir="YOUR_DIRECTROY", output_file_name="YOUR_FILE_NAME") +client.export_coco(project="YOUR_PROJECT_SLUG", tasks=tasks, output_dir="YOUR_DIRECTROY", output_file_name="YOUR_FILE_NAME") ``` If you would like to export pose estimation type annotations, please pass annotations. @@ -2085,7 +2086,7 @@ If you would like to export pose estimation type annotations, please pass annota project_slug = "YOUR_PROJECT_SLUG" tasks = client.get_image_tasks(project=project_slug) annotations = client.get_annotations(project=project_slug) -client.export_coco(tasks=tasks, annotations=annotations, output_dir="YOUR_DIRECTROY", output_file_name="YOUR_FILE_NAME") +client.export_coco(project=project_slug, tasks=tasks, annotations=annotations, output_dir="YOUR_DIRECTROY", output_file_name="YOUR_FILE_NAME") ``` ### FastLabel To YOLO @@ -2098,8 +2099,9 @@ Support the following annotation types. Get tasks and export as YOLO format files. ```python -tasks = client.get_image_tasks(project="YOUR_PROJECT_SLUG") -client.export_yolo(tasks, output_dir="YOUR_DIRECTROY") +project_slug = "YOUR_PROJECT_SLUG" +tasks = client.get_image_tasks(project=project_slug) +client.export_yolo(project=project_slug, tasks=tasks, output_dir="YOUR_DIRECTROY") ``` Get tasks and export as YOLO format files with classes.txt @@ -2110,7 +2112,7 @@ project_slug = "YOUR_PROJECT_SLUG" tasks = client.get_image_tasks(project=project_slug) annotations = client.get_annotations(project=project_slug) classes = list(map(lambda annotation: annotation["value"], annotations)) -client.export_yolo(tasks=tasks, classes=classes, output_dir="YOUR_DIRECTROY") +client.export_yolo(project=project_slug, tasks=tasks, classes=classes, output_dir="YOUR_DIRECTROY") ``` ### FastLabel To Pascal VOC @@ -2123,8 +2125,9 @@ Support the following annotation types. Get tasks and export as Pascal VOC format files. ```python -tasks = client.get_image_tasks(project="YOUR_PROJECT_SLUG") -client.export_pascalvoc(tasks) +project_slug = "YOUR_PROJECT_SLUG" +tasks = client.get_image_tasks(project=project_slug) +client.export_pascalvoc(project=project_slug, tasks=tasks) ``` ### FastLabel To labelme @@ -2388,7 +2391,6 @@ for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"), > Please check const.COLOR_PALLETE for index colors. - ## Execute endpoint Create the endpoint from the screen at first.