## Importing Libraries

In [1]:
from pathlib import Path
import shutil

from lxml import etree
from pydantic import BaseModel

## Defining Data and Preprocessor Classes

In [2]:
class PascalBBoxModel(BaseModel):
    x_min: int
    x_max: int
    y_min: int
    y_max: int


class PascalObjectModel(BaseModel):
    name: str
    bbox: PascalBBoxModel


class PascalAnnotationModel(BaseModel):
    filename: str
    image_width: int
    image_height: int
    objects: list[PascalObjectModel]


class YOLOBBoxModel(BaseModel):
    x_center: float
    y_center: float
    width: float
    height: float

    def __str__(self) -> str:
        str_repr = \
            f"{self.x_center} {self.y_center} {self.width} {self.height}"
        return str_repr


class YOLOObjectModel(BaseModel):
    index: int
    bbox: YOLOBBoxModel

    def __str__(self) -> str:
        str_repr = f"{self.index} {str(self.bbox)}"
        return str_repr


class YOLOAnnotationModel(BaseModel):
    filename: str
    image_width: int
    image_height: int
    objects: list[YOLOObjectModel]

In [3]:
class PreProcessor:

    class_names: list[str] = []
    destination_dir_paths = {
        "train": Path("./yolov7/train/").resolve(),
        "val": Path("./yolov7/val/").resolve(),
        "test": Path("./yolov7/test/").resolve(),
    }
    subdir_names = {
        "image": "images",
        "annotation": "labels"
    }

    @classmethod
    def preprocess(cls, src_annotations_dir_path: Path,
                   src_images_dir_path: Path) -> None:
        for dir_path in cls.destination_dir_paths.values():
            dir_path.mkdir(parents=True, exist_ok=True)
            for subdir_name in cls.subdir_names.values():
                subdir_path = dir_path / subdir_name
                subdir_path.mkdir(parents=True, exist_ok=True)
        position = 0
        for src_annotation_file_path in src_annotations_dir_path.iterdir():
            if src_annotation_file_path.suffix != ".xml":
                continue
            if position < 700:
                dest_dir_path = cls.destination_dir_paths["train"]
            elif position >= 700 and position < 800:
                dest_dir_path = cls.destination_dir_paths["val"]
            else:
                dest_dir_path = cls.destination_dir_paths["test"]
            cls.__preprocess_image_and_annotation(
                src_annotation_file_path=src_annotation_file_path,
                src_images_dir_path=src_images_dir_path,
                dest_dir_path=dest_dir_path)
            position += 1
            
    @classmethod
    def __preprocess_image_and_annotation(
            cls, src_annotation_file_path: Path, src_images_dir_path: Path,
            dest_dir_path: Path) -> None:
        pascal_annotation = cls.__parse_pascal_annotation_file(
            pascal_annotation_file_path=src_annotation_file_path)
        yolo_annotation = cls.__convert_pascal_annotation_to_yolo_annotation(
            pascal_annotation=pascal_annotation)
        dest_annotation_file_path = \
            dest_dir_path / cls.subdir_names["annotation"] / \
            f"{src_annotation_file_path.stem}.txt"
        src_image_file_path = src_images_dir_path / yolo_annotation.filename
        dest_image_file_path = dest_dir_path / cls.subdir_names["image"] / \
            f"{yolo_annotation.filename}"
        cls.__write_yolo_annotation_file(
            yolo_annotation=yolo_annotation,
            yolo_annotation_file_path=dest_annotation_file_path)
        cls.__copy_image_file(src_image_file_path=src_image_file_path,
                              dest_image_file_path=dest_image_file_path)

    @classmethod
    def __copy_image_file(cls, src_image_file_path: Path,
                          dest_image_file_path: Path) -> None:
        shutil.copy2(src=src_image_file_path, dst=dest_image_file_path)

    @classmethod
    def __write_yolo_annotation_file(
            cls, yolo_annotation: YOLOAnnotationModel,
            yolo_annotation_file_path: Path) -> None:
        results = [None] * len(yolo_annotation.objects)
        for index_object, object in enumerate(yolo_annotation.objects):
            results[index_object] = str(object)
        with yolo_annotation_file_path.open(mode="w") as annotation_file:
            annotation_file.write("\n".join(results))

    @classmethod
    def __parse_pascal_annotation_file(cls, pascal_annotation_file_path: Path
                                       ) -> PascalAnnotationModel:
        tree = etree.parse(pascal_annotation_file_path)
        filename = tree.find(".//filename").text
        image_width = int(tree.find(".//size/width").text)
        image_height = int(tree.find(".//size/height").text)
        tree_objects = tree.findall(".//object")
        objects = [None] * len(tree_objects)
        for index_object, object in enumerate(tree_objects):
            name = object.find("name").text
            x_min = int(object.find(".//bndbox/xmin").text)
            y_min = int(object.find(".//bndbox/ymin").text)
            x_max = int(object.find(".//bndbox/xmax").text)
            y_max = int(object.find(".//bndbox/ymax").text)
            bbox = PascalBBoxModel(x_min=x_min, x_max=x_max,
                                          y_min=y_min, y_max=y_max)
            objects[index_object] = PascalObjectModel(name=name, bbox=bbox)
        pascal_annotation = PascalAnnotationModel(
            filename=filename, image_width=image_width,
            image_height=image_height, objects=objects)
        return pascal_annotation

    @classmethod
    def __convert_pascal_annotation_to_yolo_annotation(
            cls, pascal_annotation: PascalAnnotationModel
            ) -> YOLOAnnotationModel:
        yolo_objects = [None] * len(pascal_annotation.objects)
        for index_object, object in enumerate(pascal_annotation.objects):
            yolo_objects[index_object] = \
                cls.__convert_pascal_object_to_yolo_object(
                    pascal_object=object,
                    image_width=pascal_annotation.image_width,
                    image_height=pascal_annotation.image_height)
        yolo_annotation = YOLOAnnotationModel(
            filename=pascal_annotation.filename,
            image_width=pascal_annotation.image_width,
            image_height=pascal_annotation.image_height,
            objects=yolo_objects)
        return yolo_annotation

    @classmethod
    def __convert_pascal_object_to_yolo_object(
            cls, pascal_object: PascalObjectModel, image_width: int,
            image_height: int) -> YOLOObjectModel:
        class_index = cls.__get_class_index(class_name=pascal_object.name)
        yolo_bbox = cls.__convert_pascal_bbox_to_yolo_bbox(
            pascal_bbox=pascal_object.bbox,
            image_width=image_width,
            image_height=image_height)
        yolo_object = YOLOObjectModel(index=class_index, bbox=yolo_bbox)
        return yolo_object

    @classmethod
    def __convert_pascal_bbox_to_yolo_bbox(
            cls, pascal_bbox: PascalBBoxModel,
            image_width: int, image_height: int) -> YOLOBBoxModel:
        x_center = ((pascal_bbox.x_max + pascal_bbox.x_min) / 2) / image_width
        y_center = ((pascal_bbox.y_max + pascal_bbox.y_min) / 2) / image_height
        width = (pascal_bbox.x_max - pascal_bbox.x_min) / image_width
        height = (pascal_bbox.y_max - pascal_bbox.y_min) / image_height
        yolo_bbox = YOLOBBoxModel(x_center=x_center, y_center=y_center,
                                  width=width, height=height)
        return yolo_bbox

    @classmethod
    def __get_class_index(cls, class_name: str) -> int:
        if class_name not in cls.class_names:
            cls.class_names.append(class_name)
        class_index = cls.class_names.index(class_name)
        return class_index        


## Setting Global Parameters

In [4]:
src_annotations_dir_path = Path("./raw_data/annotations/").resolve()
src_images_dir_path = Path("./raw_data/images/").resolve()

In [5]:
PreProcessor.preprocess(src_annotations_dir_path=src_annotations_dir_path,
                        src_images_dir_path=src_images_dir_path)

In [6]:
PreProcessor.class_names

['with_mask', 'without_mask', 'mask_weared_incorrect']