# Generate YOLO dataset

In [1]:
from pathlib import Path

In [2]:
HOME = Path.cwd().parent.parent
HOME

PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader')

In [3]:
annotations_dir = HOME/"annotations/keypoints/yolo"
annotations_dir.exists()

True

In [4]:
paths = list(annotations_dir.iterdir())
paths[:5]

[PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/annotations/keypoints/yolo/074eddc202491b98ef41eb4b098561af_jpeg.rf.80951684464b2c2733fd7577e30a6f1f.txt'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/annotations/keypoints/yolo/2eafac15546e9c5cd8538695b90570b4_jpeg.rf.6483e1264459d9432fb5aae035bb25b8.txt'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/annotations/keypoints/yolo/0b4fe808d14279d05af251ae3cc97ece_jpg.rf.71c1d8794ed89ff05cd022159ee57d3b.txt'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/annotations/keypoints/yolo/269e57cbfd1ab558bb5c293f341df292_jpeg.rf.7a60b73403ec7eae0dba92b41121fa49.txt'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/annotations/keypoints/yolo/0dfd35bfcf3c5d0cb81e30e5a982a1a2_jpeg.rf.ca05e80a562fbb51d64220730e531352.txt')]

In [5]:
len(paths)

1200

In [6]:
from random import shuffle, sample

In [7]:
shuffled_annotation_paths = paths.copy()
shuffle(shuffled_annotation_paths)

In [8]:
original_dataset_dir = HOME/"datasets/original"

In [9]:
list(original_dataset_dir.iterdir())[:5]

[PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/51bd4725d65b0175b0b4447f4b8b630d_jpeg.rf.cf17e4d4390813c1cec05df0572ffe49.jpg'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/9387b9d621e5627c0794057d236d4b25_jpeg.rf.e776501d01c4319111a4d596adea40d0.jpg'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/24899b8291ff742a2f397962fbbfd16d_jpeg.rf.1dfedd93fbf7f93a7ec5e736c3eaabcf.jpg'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/61073e3fd3eb7b1d90161e84e13fd112_jpeg.rf.f04b5d8cf09d0bac7262f8a1037b9495.jpg'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/4307aa316c050e1fd9cee80044ce2305_jpeg.rf.f0d9e00bca600044d840f1e3a571177c.jpg')]

In [10]:
annotated_image_names = [p.stem for p in paths]
unseen_image_names = [
    img_path for img_path in list(original_dataset_dir.iterdir())
    if img_path.stem not in annotated_image_names
    ]
len(unseen_image_names)

5953

In [11]:
unseen_image_names[:3]

[PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/51bd4725d65b0175b0b4447f4b8b630d_jpeg.rf.cf17e4d4390813c1cec05df0572ffe49.jpg'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/9387b9d621e5627c0794057d236d4b25_jpeg.rf.e776501d01c4319111a4d596adea40d0.jpg'),
 PosixPath('/home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/original/61073e3fd3eb7b1d90161e84e13fd112_jpeg.rf.f04b5d8cf09d0bac7262f8a1037b9495.jpg')]

In [12]:
TRAINING_IMAGES_NUM = 1000

In [13]:
annotation_paths_train = shuffled_annotation_paths[:TRAINING_IMAGES_NUM]
annotation_paths_val = shuffled_annotation_paths[TRAINING_IMAGES_NUM:]
image_paths_test = sample(unseen_image_names, 100)

In [14]:
set([file_path.suffix for file_path in original_dataset_dir.iterdir()])

{'.jpg'}

In [15]:
import shutil

In [16]:
def reset_yolo_dataset_structure(dataset_dir: Path):
    """
    Clears a YOLO dataset directory and resets it to the default empty structure.

    Args:
        dataset_dir (Path): Path to the root dataset directory.
    """
    # Define the expected structure
    structure = [
        dataset_dir / 'train' / 'images',
        dataset_dir / 'train' / 'labels',
        dataset_dir / 'val' / 'images',
        dataset_dir / 'val' / 'labels',
        dataset_dir / 'test' / 'images',
    ]

    # Remove the dataset directory if it exists
    if dataset_dir.exists() and dataset_dir.is_dir():
        shutil.rmtree(dataset_dir)

    # Recreate the directory structure
    for path in structure:
        path.mkdir(parents=True, exist_ok=True)

    print(f"YOLO dataset directory reset at: {dataset_dir}")

In [17]:
dataset_path = HOME/"datasets/yolo"
reset_yolo_dataset_structure(dataset_path)

YOLO dataset directory reset at: /home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/yolo


In [18]:
annotation_groups_map = {
    "train": annotation_paths_train,
    "val": annotation_paths_val,
    "test": image_paths_test,
}

In [19]:
for group, paths in annotation_groups_map.items():
    image_group_path = HOME/f"datasets/yolo/{group}/images"
    label_group_path = HOME/f"datasets/yolo/{group}/labels"
    for path in paths:
        if group.lower() == "test":
            shutil.copy2(path, image_group_path)
        else:
            image_path = (
                HOME/f"datasets/original/{path.stem}.jpg"
                )
            shutil.copy2(image_path, image_group_path)
            shutil.copy2(path, label_group_path)

In [21]:
import os

In [22]:
for root, dirnames, filenames in os.walk(dataset_path):
    # print("[ROOT] ", root)
    # print("[DIRS] ", dirnames)
    # print("[FILES] ", filenames[:5])
    if "images" in dirnames:
        print("[ROOT] ", root)
        root_path = Path(root)
        images_num = sum(1 for _ in (root_path/"images").iterdir())
        labels_num = (
            sum(1 for _ in (Path(root)/"labels").iterdir())
            if root_path.stem != "test"
            else 0
        )
        print("[IMAGES] ", images_num)
        print("[LABELS] ", labels_num)

[ROOT]  /home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/yolo/train
[IMAGES]  1000
[LABELS]  1000
[ROOT]  /home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/yolo/val
[IMAGES]  200
[LABELS]  200
[ROOT]  /home/ubuntu_wsl/computer_vision/analog_watch_reader/datasets/yolo/test
[IMAGES]  100
[LABELS]  0
