diff --git a/README.md b/README.md index 29a251b..56210d3 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ _If you are using FastLabel prototype, please install version 0.2.2._ - [Pascal VOC](#pascal-voc) - [labelme](#labelme) - [Segmentation](#segmentation) +- [Converter to FastLabel format](#converter-to-fastlabel-format) ## Installation @@ -942,6 +943,303 @@ tasks = client.get_image_tasks(project="YOUR_PROJECT_SLUG") client.export_semantic_segmentation(tasks) ``` +## Converter to FastLabel format + +### Response + +Example of a converted annotations + +```python +{ + 'sample1.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ], + 'sample2.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ] +} +``` + +In the case of YOLO, Pascal VOC, and labelme, the key is the tree structure if the tree structure is multi-level. + +``` +dataset +├── sample1.jpg +├── sample1.txt +└── sample_dir + ├── sample2.jpg + └── sample2.txt +``` + +```python +{ + 'sample1.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ], + 'sample_dir/sample2.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ] +} +``` + +### COCO + +Supported bbox or polygon annotation type. + +Convert annotation file of [COCO format](https://cocodataset.org/#format-data) as a Fastlabel format and create task. + +file_path: COCO annotation json file path + +```python +annotations_map = client.convert_coco_to_fastlabel(file_path="./sample.json") +task_id = client.create_image_task( + project="YOUR_PROJECT_SLUG", + name="sample.jpg", + file_path="./sample.jpg", + annotations=annotations_map.get("sample.jpg") +) +``` + +Example of converting annotations to create multiple tasks. + +In the case of the following tree structure. + +``` +dataset +├── annotation.json +├── sample1.jpg +└── sample2.jpg +``` + +Example source code. + +```python +import fastlabel + +project = "YOUR_PROJECT_SLUG" +input_file_path = "./dataset/annotation.json" +input_dataset_path = "./dataset/" + +annotations_map = client.convert_coco_to_fastlabel(file_path=input_file_path) +for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"), recursive=True): + time.sleep(1) + name = image_file_path.replace(os.path.join(*[input_dataset_path, ""]), "") + file_path = image_file_path + annotations = annotations_map.get(name) if annotations_map.get(name) is not None else [] + task_id = client.create_image_task( + project=project, + name=name, + file_path=file_path, + annotations=annotations + ) +``` + +### YOLO + +Supported bbox annotation type. + +Convert annotation file of YOLO format as a Fastlabel format and create task. + +classes_file_path: YOLO classes text file path +dataset_folder_path: Folder path containing YOLO Images and annotation + +```python +annotations_map = client.convert_yolo_to_fastlabel( + classes_file_path="./classes.txt", + dataset_folder_path="./dataset/" +) +task_id = client.create_image_task( + project="YOUR_PROJECT_SLUG", + name="sample.jpg", + file_path="./dataset/sample.jpg", + annotations=annotations_map.get("sample.jpg") +) +``` + +Example of converting annotations to create multiple tasks. + +In the case of the following tree structure. + +``` +yolo +├── classes.txt +└── dataset + ├── sample1.jpg + ├── sample1.txt + ├── sample2.jpg + └── sample2.txt +``` + +Example source code. + +```python +import fastlabel + +project = "YOUR_PROJECT_SLUG" +input_file_path = "./classes.txt" +input_dataset_path = "./dataset/" +annotations_map = client.convert_yolo_to_fastlabel( + classes_file_path=input_file_path, + dataset_folder_path=input_dataset_path +) +for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"), recursive=True): + time.sleep(1) + name = image_file_path.replace(os.path.join(*[input_dataset_path, ""]), "") + file_path = image_file_path + annotations = annotations_map.get(name) if annotations_map.get(name) is not None else [] + task_id = client.create_image_task( + project=project, + name=name, + file_path=file_path, + annotations=annotations + ) +``` + +### Pascal VOC + +Supported bbox annotation type. + +Convert annotation file of Pascal VOC format as a Fastlabel format and create task. + +folder_path: Folder path including pascal VOC format annotation files + +```python +annotations_map = client.convert_pascalvoc_to_fastlabel(folder_path="./dataset/") +task_id = client.create_image_task( + project="YOUR_PROJECT_SLUG", + name="sample.jpg", + file_path="./dataset/sample.jpg", + annotations=annotations_map.get("sample.jpg") +) +``` + +Example of converting annotations to create multiple tasks. + +In the case of the following tree structure. + +``` +dataset +├── sample1.jpg +├── sample1.xml +├── sample2.jpg +└── sample2.xml +``` + +Example source code. + +```python +import fastlabel + +project = "YOUR_PROJECT_SLUG" +input_dataset_path = "./dataset/" + +annotations_map = client.convert_pascalvoc_to_fastlabel(folder_path=input_dataset_path) +for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"), recursive=True): + time.sleep(1) + name = image_file_path.replace(os.path.join(*[input_dataset_path, ""]), "") + file_path = image_file_path + annotations = annotations_map.get(name) if annotations_map.get(name) is not None else [] + task_id = client.create_image_task( + project=project, + name=name, + file_path=file_path, + annotations=annotations + ) +``` + +### labelme + +support the following annotation types. + +- bbox +- polygon +- points +- line + +Convert annotation file of labelme format as a Fastlabel format and create task. + +folder_path: Folder path including labelme format annotation files + +```python +annotations_map = client.convert_labelme_to_fastlabel(folder_path="./dataset/") +task_id = client.create_image_task( + project="YOUR_PROJECT_SLUG", + name="sample.jpg", + file_path="./sample.jpg", + annotations=annotations_map.get("sample.jpg") +) +``` + +Example of converting annotations to create multiple tasks. + +In the case of the following tree structure. + +``` +dataset +├── sample1.jpg +├── sample1.json +├── sample2.jpg +└── sample2.json +``` + +Example source code. + +```python +import fastlabel + +project = "YOUR_PROJECT_SLUG" +input_dataset_path = "./dataset/" + +annotations_map = client.convert_labelme_to_fastlabel(folder_path=input_dataset_path) +for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"), recursive=True): + time.sleep(1) + name = image_file_path.replace(os.path.join(*[input_dataset_path, ""]), "") + file_path = image_file_path + annotations = annotations_map.get(name) if annotations_map.get(name) is not None else [] + task_id = client.create_image_task( + project=project, + name=name, + file_path=file_path, + annotations=annotations + ) +``` + > Please check const.COLOR_PALLETE for index colors. ## API Docs diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index ce32ba3..3ed47c3 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -1,19 +1,22 @@ -import os import glob import json -from typing import List +import os +import re from logging import getLogger -from PIL import Image +from typing import List + import cv2 import numpy as np - import xmltodict +from PIL import Image -from .exceptions import FastLabelInvalidException -from .api import Api -from fastlabel import converters, utils, const +from fastlabel import const, converters, utils from fastlabel.const import AnnotationType +from .api import Api +from .exceptions import FastLabelInvalidException + + logger = getLogger(__name__) @@ -717,6 +720,305 @@ def delete_task(self, task_id: str) -> None: endpoint = "tasks/" + task_id self.api.delete_request(endpoint) + # Convert to Fastlabel + + def convert_coco_to_fastlabel(self, file_path: str) -> dict: + """ + Convert COCO format to FastLabel format as annotation file. + + file_path is a COCO format annotation file. (Required) + + In the output file, the key is the image file name and the value is a list of annotations in FastLabel format, which is returned in dict format. + + output format example. + { + 'sample1.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ], + 'sample2.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ] + } + """ + with open(file_path, "r") as f: + file = f.read() + return converters.execute_coco_to_fastlabel(eval(file)) + + def convert_labelme_to_fastlabel(self, folder_path: str) -> dict: + """ + Convert labelme format to FastLabel format as annotation files. + + folder_path is the folder that contains the labelme format files with the json extension. (Required) + + In the output file, the key is the image file name and the value is a list of annotations in FastLabel format, which is returned in dict format. + If the tree has multiple hierarchies, the key is the relative path rooted at the specified folder name. + + output format example. + In the case of labelme, the key is the tree structure if the tree structure is multi-level. + + [tree structure] + dataset + ├── sample1.jpg + ├── sample1.json + └── sample_dir + ├── sample2.jpg + └── sample2.json + + [output] + { + 'sample1.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ], + 'sample_dir/sample2.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ] + } + """ + results = {} + for file_path in glob.iglob( + os.path.join(folder_path, "**/**.json"), recursive=True + ): + with open(file_path, "r") as f: + c = converters.execute_labelme_to_fastlabel( + json.load(f), + file_path.replace(os.path.join(*[folder_path, ""]), ""), + ) + results[c[0]] = c[1] + return results + + def convert_pascalvoc_to_fastlabel(self, folder_path: str) -> dict: + """ + Convert PascalVOC format to FastLabel format as annotation files. + + folder_path is the folder that contains the PascalVOC format files with the xml extension. (Required) + + In the output file, the key is the image file name and the value is a list of annotations in FastLabel format, which is returned in dict format. + If the tree has multiple hierarchies, the key is the relative path rooted at the specified folder name. + + output format example. + In the case of PascalVOC, the key is the tree structure if the tree structure is multi-level. + + [tree structure] + dataset + ├── sample1.jpg + ├── sample1.xml + └── sample_dir + ├── sample2.jpg + └── sample2.xml + + [output] + { + 'sample1.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ], + 'sample_dir/sample2.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ] + } + """ + results = {} + for file_path in glob.iglob( + os.path.join(folder_path, "**/**.xml"), recursive=True + ): + with open(file_path, "r") as f: + file = f.read() + c = converters.execute_pascalvoc_to_fastlabel( + xmltodict.parse(file), + file_path.replace(os.path.join(*[folder_path, ""]), ""), + ) + results[c[0]] = c[1] + return results + + def convert_yolo_to_fastlabel( + self, classes_file_path: str, dataset_folder_path: str + ) -> dict: + """ + Convert YOLO format to FastLabel format as annotation files. + + classes_file_path is YOLO format class file. (Required) + dataset_folder_path is the folder that contains the image file and YOLO format files with the txt extension. (Required) + + In the output file, the key is the image file name and the value is a list of annotations in FastLabel format, which is returned in dict format. + If the tree has multiple hierarchies, the key is the relative path rooted at the specified folder name. + + output format example. + In the case of YOLO, the key is the tree structure if the tree structure is multi-level. + + [tree structure] + dataset + ├── sample1.jpg + ├── sample1.txt + └── sample_dir + ├── sample2.jpg + └── sample2.txt + + [output] + { + 'sample1.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ], + 'sample_dir/sample2.jpg': [ + { + 'points': [ + 100, + 100, + 200, + 200 + ], + 'type': 'bbox', + 'value': 'cat' + } + ] + } + """ + classes = self.__get_yolo_format_classes(classes_file_path) + image_sizes = self.__get_yolo_image_sizes(dataset_folder_path) + yolo_annotations = self.__get_yolo_format_annotations( + dataset_folder_path) + + return converters.execute_yolo_to_fastlabel( + classes, + image_sizes, + yolo_annotations, + os.path.join(*[dataset_folder_path, ""]), + ) + + def __get_yolo_format_classes(self, classes_file_path: str) -> dict: + """ + return data format + { + id: classs_name + ... + } + """ + classes = {} + with open(classes_file_path, "r") as f: + lines = f.readlines() + line_index = 0 + for line in lines: + classes[str(line_index)] = line.strip() + line_index += 1 + return classes + + def __get_yolo_image_sizes(self, dataset_folder_path: str) -> dict: + """ + return data format + { + image_file_path_without_ext: { + "image_file_path": image file full path + "size": [whdth, height] + ... + } + """ + image_types = utils.get_supported_image_ext() + image_paths = [ + p for p in glob.glob(os.path.join(dataset_folder_path, "**/*"), recursive=True) + if re.search("/*\.({})".format("|".join(image_types)), str(p)) + ] + image_sizes = {} + for image_path in image_paths: + image = Image.open(image_path) + width, height = image.size + image_sizes[image_path.replace(os.path.splitext(image_path)[1], "")] = { + "image_file_path": image_path, + "size": [width, height], + } + + return image_sizes + + def __get_yolo_format_annotations(self, dataset_folder_path: str) -> dict: + """ + return data format + { + annotaion_file_path_without_ext: + [ + yolo_class_id, + yolo_center_x_ratio, + yolo_center_y_ratio, + yolo_anno_width_ratio, + yolo_anno_height_ratio + ], + ... + } + """ + yolo_annotations = {} + annotaion_file_paths = [ + p for p in glob.glob(os.path.join(dataset_folder_path, "**/*.txt"), recursive=True) + if re.search(("/*\.txt"), str(p)) + ] + for annotaion_file_path in annotaion_file_paths: + with open(annotaion_file_path, "r") as f: + anno_lines = f.readlines() + annotaion_key = annotaion_file_path.replace(".txt", "") + yolo_annotations[annotaion_key] = [] + for anno_line in anno_lines: + yolo_annotations[annotaion_key].append( + anno_line.strip().split(" ")) + return yolo_annotations + # Task Convert def export_coco(self, tasks: list, output_dir: str = os.path.join("output", "coco")) -> None: diff --git a/fastlabel/converters.py b/fastlabel/converters.py index 47b9409..7ee887a 100644 --- a/fastlabel/converters.py +++ b/fastlabel/converters.py @@ -6,6 +6,7 @@ import numpy as np import math from fastlabel.const import AnnotationType +import os # COCO @@ -519,3 +520,167 @@ def __get_pixel_coordinates(points: List[int or float]) -> List[int]: new_points.append(int(prev_x + int(xdiff / mindiff * (i + 1)))) new_points.append(int(prev_y + int(ydiff / mindiff * (i + 1)))) return new_points + + +def execute_coco_to_fastlabel(coco: dict) -> dict: + coco_images = {} + for c in coco["images"]: + coco_images[c["id"]] = c["file_name"] + + coco_categories = {} + for c in coco["categories"]: + coco_categories[c["id"]] = c["name"] + + coco_annotations = coco["annotations"] + + results = {} + for coco_image_key in coco_images: + target_coco_annotations = filter( + lambda annotation: annotation["image_id"] == coco_image_key, + coco_annotations, + ) + if not target_coco_annotations: + return + + annotations = [] + for target_coco_annotation in target_coco_annotations: + category_name = coco_categories[target_coco_annotation["category_id"]] + if not category_name: + return + + segmentation = target_coco_annotation["segmentation"][0] + annotation_type = "" + if len(segmentation) == 4: + annotation_type = AnnotationType.bbox.value + if len(segmentation) > 4: + annotation_type = AnnotationType.polygon.value + annotations.append( + { + "value": category_name, + "points": segmentation, + "type": annotation_type, + } + ) + results[coco_images[coco_image_key]] = annotations + return results + +def execute_labelme_to_fastlabel(labelme: dict, file_path: str = None) -> tuple: + file_name = "" + if file_path: + file_name = file_path.replace( + ".json", os.path.splitext(labelme["imagePath"])[1] + ) + else: + file_name = labelme["imagePath"] + labelme_annotations = labelme["shapes"] + + annotations = [] + for labelme_annotation in labelme_annotations: + label = labelme_annotation["label"] + if not label: + return + + points = np.ravel(labelme_annotation["points"]) + annotation_type = __get_annotation_type_by_labelme( + labelme_annotation["shape_type"] + ) + annotations.append( + {"value": label, "points": points.tolist(), "type": annotation_type} + ) + + return (file_name, annotations) + +def execute_pascalvoc_to_fastlabel(pascalvoc: dict, file_path: str = None) -> tuple: + target_pascalvoc = pascalvoc["annotation"] + file_name = "" # file_path if file_path else target_pascalvoc["filename"] + if file_path: + file_name = file_path.replace( + ".xml", os.path.splitext(target_pascalvoc["filename"])[1] + ) + else: + file_name = target_pascalvoc["filename"] + pascalvoc_annotations = target_pascalvoc["object"] + if not isinstance(pascalvoc_annotations, list): + pascalvoc_annotations = [pascalvoc_annotations] + + annotations = [] + for pascalvoc_annotation in pascalvoc_annotations: + category_name = pascalvoc_annotation["name"] + if not category_name: + return + + points = [ + int(pascalvoc_annotation["bndbox"][item]) + for item in pascalvoc_annotation["bndbox"] + ] + annotations.append( + { + "value": category_name, + "points": points, + "type": AnnotationType.bbox.value, + } + ) + + return (file_name, annotations) + + +def execute_yolo_to_fastlabel( + classes: dict, + image_sizes: dict, + yolo_annotations: dict, + dataset_folder_path: str = None, +) -> dict: + results = {} + for yolo_anno_key in yolo_annotations: + annotations = [] + for each_image_annotation in yolo_annotations[yolo_anno_key]: + ( + yolo_class_id, + yolo_center_x_ratio, + yolo_center_y_ratio, + yolo_anno_width_ratio, + yolo_anno_height_ratio, + ) = each_image_annotation + image_width, image_height = image_sizes[yolo_anno_key]["size"] + + classs_name = classes[str(yolo_class_id)] + + yolo_center_x_point = float(image_width) * float(yolo_center_x_ratio) + yolo_center_y_point = float(image_height) * float(yolo_center_y_ratio) + yolo_anno_width_size = float(image_width) * float(yolo_anno_width_ratio) + yolo_anno_height_size = float(image_height) * float(yolo_anno_height_ratio) + + points = [] + points.append(yolo_center_x_point - (yolo_anno_width_size / 2)) # x1 + points.append(yolo_center_y_point - (yolo_anno_height_size / 2)) # y1 + points.append(yolo_center_x_point + (yolo_anno_width_size / 2)) # x2 + points.append(yolo_center_y_point + (yolo_anno_height_size / 2)) # y2 + annotations.append( + { + "value": classs_name, + "points": points, + "type": AnnotationType.bbox.value, + } + ) + + file_path = ( + image_sizes[yolo_anno_key]["image_file_path"].replace( + os.path.join(*[dataset_folder_path, ""]), "" + ) + if dataset_folder_path + else image_sizes[yolo_anno_key]["image_file_path"] + ) + results[file_path] = annotations + + return results + +def __get_annotation_type_by_labelme(shape_type: str) -> str: + if shape_type == "rectangle": + return "bbox" + if shape_type == "polygon": + return "polygon" + if shape_type == "point": + return "keypoint" + if shape_type == "line": + return "line" + return None diff --git a/fastlabel/utils.py b/fastlabel/utils.py index e27471b..605e9f2 100644 --- a/fastlabel/utils.py +++ b/fastlabel/utils.py @@ -24,6 +24,10 @@ def get_basename(file_path: str) -> str: return os.path.splitext(file_path)[0] +def get_supported_image_ext() -> list: + return ["jpg", "jpeg", "png"] + + def reverse_points(points: List[int]) -> List[int]: """ e.g.) diff --git a/setup.py b/setup.py index f77f019..3bd7ebc 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( name="fastlabel", - version="0.11.5", + version="0.11.6", author="eisuke-ueta", author_email="eisuke.ueta@fastlabel.ai", description="The official Python SDK for FastLabel API, the Data Platform for AI",