In [1]:
import cv2
import numpy as np
from pathlib import Path
from common_tools import plt_show
root_folder = Path("/home/rahul/chalk_following_toy_neural_network_training/dataset")

# Dataset type conversion

In [2]:
import shutil
images_path = root_folder / "images"
label_path = root_folder / "segmentations"

In [3]:
def edging(
    img: np.ndarray
) -> np.ndarray:

    """Given an grayscale label image returns normalised contour
    Taken from here: https://github.com/orgs/ultralytics/discussions/8528#discussioncomment-8868637 """

    # Dilate the labels
    dilation_shape = cv2.MORPH_ELLIPSE
    dilation_size = 5
    element = cv2.getStructuringElement(dilation_shape, (2 * dilation_size + 1, 2 * dilation_size + 1),
                                        (dilation_size, dilation_size))
    dilated = cv2.dilate(img, element)
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) == 0:
        return []

    # Now, select the contour with max area or iterate through all contours
    # For example:
    contour = max(contours, key=cv2.contourArea)
    
    # Simplify contour
    epsilon = 0.001 * cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    
    # Convert to normalized coordinates
    height, width = img.shape
    normalized_contour = approx.reshape(-1, 2) / [width, height]

    return normalized_contour

def contour_to_str(contour):

        contour = contour.squeeze()
        class_index = 0
        return f"{class_index} " + " ".join([f"{x} {y}" for x,y in contour])

***Create an new folder with Ultralytics YOLO format:***

Taken from [here](https://docs.ultralytics.com/datasets/segment/)
The dataset label format used for training YOLO segmentation models is as follows:

One text file per image: Each image in the dataset has a corresponding text file with the same name as the image file and the ".txt" extension.
One row per object: Each row in the text file corresponds to one object instance in the image.
Object information per row: Each row contains the following information about the object instance:
Object class index: An integer representing the class of the object (e.g., 0 for person, 1 for car, etc.).
Object bounding coordinates: The bounding coordinates around the mask area, normalized to be between 0 and 1.
The format for a single row in the segmentation dataset file is as follows:


<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>
In this format, <class-index> is the index of the class for the object, and <x1> <y1> <x2> <y2> ... <xn> <yn> are the bounding coordinates of the object's segmentation mask. The coordinates are separated by spaces.



In [11]:
yolo_dataset_dir = root_folder / "yolo_format" / "temp" / "chalk"
yolo_dataset_dir.mkdir(parents = True, exist_ok = True)

In [12]:
# populate the dir with appropriate text

for lbl_path in label_path.rglob("*.png"):

    label_img = cv2.imread(
        str(lbl_path)
    )
    contour = edging(label_img[:,:,2])
    with open(yolo_dataset_dir / ( lbl_path.stem+ ".txt" ), "w") as file:

        if len(contour) == 0:
            file.write("")
        else:
            file.write(
                contour_to_str(
                    contour
                )
            )

In [13]:
# Copy the images from the images dir & paste it in yolo_dataset directory

for img_path in images_path.iterdir():
    shutil.copy(img_path, yolo_dataset_dir)

YAML file will be created in next section

# Train Test Val split

In [20]:
# path for dataset generated equivalent to yolo format
yolo_dataset_dir = Path('/home/rahul/chalk_following_toy_neural_network_training/dataset/yolo_format/temp')

In [17]:
import splitfolders


# Split with a ratio.
# To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`.
splitfolders.ratio(
    input= yolo_dataset_dir,
    output=yolo_dataset_dir.parent / "dataset",
    seed=1337,
    ratio=(.8, .1, .1),
    group_prefix=None,
    move=False) # default values

Copying files: 1944 files [00:00, 2867.76 files/s]
