In [1]:
# asthma/notebooks/yolo_train_val_test_split.ipynb
import os
import sys
os.chdir("./..")
sys.path.append(os.getcwd())

import shutil
import yaml
import glob
import random
from tqdm import trange

REPO_PATH = os.getcwd().replace("\\", "/")

In [2]:
YOLO_FULL_DATA_PATH = f"{REPO_PATH}/data/full_dataset"
YOLO_DATA_PATH = f"{REPO_PATH}/data"
CLASS_TO_IGNORE_DURING_TRAINING = None # "background"
if CLASS_TO_IGNORE_DURING_TRAINING != None:
    print(f"Ignoring {CLASS_TO_IGNORE_DURING_TRAINING} class if it is found")

In [3]:
classes = os.listdir(f"{YOLO_FULL_DATA_PATH}/images")
print(f"Found {len(classes)} classes: {classes}")

Found 4 classes: ['background', 'mouth_closed', 'mouth_sealed_on_inhaler', 'remove_cap']


In [4]:
if CLASS_TO_IGNORE_DURING_TRAINING != None:
    all_image_paths = [filename.replace("\\", "/") for filename in glob.glob(f"{YOLO_FULL_DATA_PATH}/images/[!{CLASS_TO_IGNORE_DURING_TRAINING}]*/*.jpg", recursive=True)]
else:
    all_image_paths = [filename.replace("\\", "/") for filename in glob.glob(f"{YOLO_FULL_DATA_PATH}/images/*/*.jpg", recursive=True)]
all_image_filenames = [filename.split("/")[-1] for filename in all_image_paths]
all_label_paths = []
for image_filename in all_image_filenames:
    label_path = image_filename.replace(".jpg", ".txt").replace("images", "labels")
    all_label_paths.append(f"{YOLO_FULL_DATA_PATH}/labels/{label_path}")

all_image_paths = sorted(all_image_paths, key=lambda x: x.split("/")[-1])
all_label_paths = sorted(all_label_paths, key=lambda x: x.split("/")[-1])

In [5]:
train_val_test_ratio = (0.8, 0.1, 0.1)
random.seed(42)
assignation = random.choices(("train", "val", "test"), weights=train_val_test_ratio, k=len(all_image_paths))

assert len(assignation) == len(all_image_paths) == len(all_label_paths), f"The following condition was not satisfied: {len(assignation)} == {len(all_image_paths)} == {len(all_label_paths)}"

In [6]:
for i in trange(len(assignation)):
    image_path = all_image_paths[i]
    label_path = all_label_paths[i]
    assign = assignation[i]
    shutil.copy(image_path, f"{YOLO_DATA_PATH}/{assign}/images")
    shutil.copy(label_path, f"{YOLO_DATA_PATH}/{assign}/labels")

100%|██████████| 1386/1386 [00:02<00:00, 468.45it/s]


# Grab Roboflow Dataset
Source: https://universe.roboflow.com/viviana/open-close-mouth/dataset/1

In [7]:
ROBOFLOW_DATASET_PATH = f"{REPO_PATH}/data/Open-Close Mouth.v1i.yolov9"

In [8]:
with open(f"{ROBOFLOW_DATASET_PATH}/data.yaml") as file:
    try:
        roboflow_data_meta = yaml.safe_load(file)["names"]
        print(roboflow_data_meta)
    except yaml.YAMLError as exc:
        print(exc)

with open(f"{REPO_PATH}/data/inhaler.yaml") as file:
    try:
        inhaler_data_meta = yaml.safe_load(file)["names"]
        print(inhaler_data_meta)
    except yaml.YAMLError as exc:
        print(exc)

['close', 'open']
{0: 'remove_cap', 1: 'mouth_sealed_on_inhaler', 2: 'mouth_closed', 3: 'mouth_opened', 4: 'holding_inhaler'}


In [9]:
roboflow_mouth_closed_id = roboflow_data_meta.index("close")
roboflow_mouth_open_id = roboflow_data_meta.index("open")
inhaler_mouth_closed_id = list(inhaler_data_meta.values()).index("mouth_closed")
inhaler_mouth_open_id = list(inhaler_data_meta.values()).index("mouth_opened")

In [10]:
roboflow_image_paths = [filename.replace("\\", "/") for filename in glob.glob(f"{ROBOFLOW_DATASET_PATH}/*/images/*.jpg", recursive=True)]
roboflow_image_filenames = [filename.split("/")[-1] for filename in roboflow_image_paths]
roboflow_label_paths = []
for i, image_filepath in enumerate(roboflow_image_paths):
    if "train" in image_filepath:
        assign = "train"
    elif "val" in image_filepath:
        assign = "valid"
    elif "test" in image_filepath:
        assign = "test"
    else:
        raise ValueError(f"Could not find the assignation for {image_filepath}")
    image_filename = roboflow_image_filenames[i]
    label_path = image_filename.replace(".jpg", ".txt").replace("images", "labels")
    roboflow_label_paths.append(f"{ROBOFLOW_DATASET_PATH}/{assign}/labels/{label_path}")

roboflow_image_paths = sorted(roboflow_image_paths, key=lambda x: x.split("/")[-1])
roboflow_label_paths = sorted(roboflow_label_paths, key=lambda x: x.split("/")[-1])

In [11]:
for i, roboflow_label_path in enumerate(roboflow_label_paths):
    roboflow_image_path = roboflow_image_paths[i]
    image_name = roboflow_image_path.split("/")[-1]
    assign = roboflow_image_path.split("/")[-3]
    if assign == "valid":
        assign = "val"

    with open(roboflow_label_path) as f:
        lines = "".join(f.readlines())
        assert (int(lines[0]) == 0) or (int(lines[0]) == 1), f"Found unknown class '{int(lines[0])}' in {roboflow_label_path}"
        roboflow_class = "mouth_closed" if int(lines[0]) == roboflow_mouth_closed_id else "mouth_opened"
        inhaler_class = "mouth_closed" if int(lines[0]) == inhaler_mouth_closed_id else "mouth_opened"

    # Generate the new label file
    with open(f"{YOLO_DATA_PATH}/{assign}/labels/{image_name.replace('.jpg', '.txt')}", "w") as f:
        f.write(f"{inhaler_mouth_closed_id if roboflow_class == 'mouth_closed' else inhaler_mouth_open_id} {lines[2:]}")

    shutil.copy(roboflow_image_path, f"{YOLO_DATA_PATH}/{assign}/images")
