In [1]:
# asthma/notebooks/yolo_train_val_test_split.ipynb
import os
import sys
os.chdir("./..")
sys.path.append(os.getcwd())

import shutil
import yaml
import glob
import random
from tqdm import trange

REPO_PATH = os.getcwd().replace("\\", "/")

In [2]:
YOLO_FULL_DATA_PATH = f"{REPO_PATH}/data/full_dataset"
YOLO_DATA_PATH = f"{REPO_PATH}/data"
INHALER_METADATA_PATH = f"{REPO_PATH}/data/inhaler.yaml"

In [3]:
with open(INHALER_METADATA_PATH, "r") as file:
    inhaler_metadata = yaml.load(file, Loader=yaml.FullLoader)["names"]

In [4]:
all_image_paths = [filename.replace("\\", "/") for filename in glob.glob(f"{YOLO_FULL_DATA_PATH}/images/*.jpg", recursive=True)]
all_image_filenames = [filename.split("/")[-1] for filename in all_image_paths]
all_label_paths = []
for image_filename in all_image_filenames:
    label_path = image_filename.replace(".jpg", ".txt").replace("images", "labels")
    all_label_paths.append(f"{YOLO_FULL_DATA_PATH}/labels/{label_path}")

all_image_paths = sorted(all_image_paths, key=lambda x: x.split("/")[-1])
all_label_paths = sorted(all_label_paths, key=lambda x: x.split("/")[-1])

In [5]:
all_video_filenames = list(["_".join(filename.split("_")[:-1]) for filename in all_image_filenames])
unique_video_filenames = sorted(list(set(all_video_filenames)))

train_video_filenames = ['1', '2', '4', 'IMG_7283', 'IMG_7284', 'IMG_7288', 'IMG_7291', 'IMG_7292', 'inhaler_cap_hands_negative_2', 'inhaler_hand_negative_1',\
 'inhaler_hand_negative_2', 'inhaler_hand_positive', 'mouth_sealed_negative_1', 'mouth_sealed_negative_2', 'mouth_sealed_negative_5', 'mouth_sealed_negative_6']
val_video_filenames = list(set(unique_video_filenames) - set(train_video_filenames))

print(f"Train videos: {train_video_filenames}")
print(f"Val videos: {val_video_filenames}")

Train videos: ['1', '2', '4', 'IMG_7283', 'IMG_7284', 'IMG_7288', 'IMG_7291', 'IMG_7292', 'inhaler_cap_hands_negative_2', 'inhaler_hand_negative_1', 'inhaler_hand_negative_2', 'inhaler_hand_positive', 'mouth_sealed_negative_1', 'mouth_sealed_negative_2', 'mouth_sealed_negative_5', 'mouth_sealed_negative_6']
Val videos: ['inhaler_hand_negative_3', 'WIN_20240401_14_18_05_Pro', 'mouth_sealed_negative_8', 'inhaler_cap_hands_negative_1', 'WIN_20240401_14_20_35_Pro', 'mouth_sealed_negative_3', 'WIN_20240401_14_23_09_Pro', 'inhaler_hand_negative_4', 'WIN_20240401_14_14_22_Pro', 'mouth_sealed_negative_4', 'IMG_7295', 'mouth_sealed_negative_7', 'WIN_20240401_14_27_59_Pro']


In [6]:
assignation = []
for image_filename in all_image_paths:
    image_filename = image_filename.split("/")[-1]
    video_filename = "_".join(image_filename.split("_")[:-1])
    if video_filename in train_video_filenames:
        assignation.append("train")
    elif video_filename in val_video_filenames:
        assignation.append("val")
    else:
        raise ValueError(f"Video filename {video_filename} not found in train or val videos")

In [7]:
for i in trange(len(assignation)):
    image_path = all_image_paths[i]
    label_path = all_label_paths[i]
    assign = assignation[i]
    shutil.copy(image_path, f"{YOLO_DATA_PATH}/{assign}/images")
    if os.path.exists(label_path):
        shutil.copy(label_path, f"{YOLO_DATA_PATH}/{assign}/labels")

100%|██████████| 3297/3297 [00:06<00:00, 485.78it/s]


### Finding the number of bounding boxes for each class in this new dataset

In [9]:
classes_count = {}
for label_path in all_label_paths:
    if not os.path.exists(label_path): continue
    with open(label_path, "r") as f:
        lines = f.readlines()
        class_id = [int(line.split(" ")[0]) for line in lines]
        for id in class_id:
            id = inhaler_metadata[id]
            if id not in classes_count:
                classes_count[id] = 0
            classes_count[id] += 1

print(classes_count)

{'inhaler_hand': 1845, 'cap_hand': 828, 'mouth_closed': 2665, 'mouth_opened': 666, 'mouth_sealed_on_inhaler': 544}


# Grab Roboflow Dataset
Source: https://universe.roboflow.com/viviana/open-close-mouth/dataset/1

In [10]:
ROBOFLOW_DATASET_PATH = f"{REPO_PATH}/data/Open-Close Mouth.v1i.yolov9"

In [11]:
with open(f"{ROBOFLOW_DATASET_PATH}/data.yaml") as file:
    try:
        roboflow_data_meta = yaml.safe_load(file)["names"]
        print(roboflow_data_meta)
    except yaml.YAMLError as exc:
        print(exc)

with open(f"{REPO_PATH}/data/inhaler.yaml") as file:
    try:
        inhaler_data_meta = yaml.safe_load(file)["names"]
        print(inhaler_data_meta)
    except yaml.YAMLError as exc:
        print(exc)

['close', 'open']
{0: 'inhaler_hand', 1: 'mouth_sealed_on_inhaler', 2: 'mouth_closed', 3: 'cap_hand', 4: 'mouth_opened'}


In [12]:
roboflow_mouth_closed_id = roboflow_data_meta.index("close")
roboflow_mouth_open_id = roboflow_data_meta.index("open")
inhaler_mouth_closed_id = list(inhaler_data_meta.values()).index("mouth_closed")
inhaler_mouth_open_id = list(inhaler_data_meta.values()).index("mouth_opened")

In [13]:
roboflow_image_paths = [filename.replace("\\", "/") for filename in glob.glob(f"{ROBOFLOW_DATASET_PATH}/*/images/*.jpg", recursive=True)]
roboflow_image_filenames = [filename.split("/")[-1] for filename in roboflow_image_paths]
roboflow_label_paths = []
for i, image_filepath in enumerate(roboflow_image_paths):
    if "train" in image_filepath:
        assign = "train"
    elif "val" in image_filepath:
        assign = "valid"
    elif "test" in image_filepath:
        assign = "test"
    else:
        raise ValueError(f"Could not find the assignation for {image_filepath}")
    image_filename = roboflow_image_filenames[i]
    label_path = image_filename.replace(".jpg", ".txt").replace("images", "labels")
    roboflow_label_paths.append(f"{ROBOFLOW_DATASET_PATH}/{assign}/labels/{label_path}")

roboflow_image_paths = sorted(roboflow_image_paths, key=lambda x: x.split("/")[-1])
roboflow_label_paths = sorted(roboflow_label_paths, key=lambda x: x.split("/")[-1])

In [14]:
# Count the number of mouth opened and mouth closed in Roboflow Dataset
roboflow_classes_count = {}
for label_path in roboflow_label_paths:
    with open(label_path, "r") as f:
        lines = f.readlines()
        class_id = [int(line.split(" ")[0]) for line in lines]
        for id in class_id:
            if id == roboflow_mouth_closed_id:
                id = "mouth_closed"
            elif id == roboflow_mouth_open_id:
                id = "mouth_opened"
            if id not in roboflow_classes_count:
                roboflow_classes_count[id] = 0
            roboflow_classes_count[id] += 1
print(roboflow_classes_count)

{'mouth_closed': 727, 'mouth_opened': 721}


In [15]:
for i, roboflow_label_path in enumerate(roboflow_label_paths):
    roboflow_image_path = roboflow_image_paths[i]
    image_name = roboflow_image_path.split("/")[-1]
    assign = roboflow_image_path.split("/")[-3]
    if assign == "valid":
        assign = "val"

    with open(roboflow_label_path) as f:
        lines = "".join(f.readlines())
        assert (int(lines[0]) == 0) or (int(lines[0]) == 1), f"Found unknown class '{int(lines[0])}' in {roboflow_label_path}"
        roboflow_class = "mouth_closed" if int(lines[0]) == roboflow_mouth_closed_id else "mouth_opened"
        inhaler_class = "mouth_closed" if int(lines[0]) == inhaler_mouth_closed_id else "mouth_opened"

    # Generate the new label file
    with open(f"{YOLO_DATA_PATH}/{assign}/labels/{image_name.replace('.jpg', '.txt')}", "w") as f:
        f.write(f"{inhaler_mouth_closed_id if roboflow_class == 'mouth_closed' else inhaler_mouth_open_id} {lines[2:]}")

    shutil.copy(roboflow_image_path, f"{YOLO_DATA_PATH}/{assign}/images")
