## sampling

In [None]:
import os
import shutil
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

def sample_and_copy_files_from_subdir(root, subdirs, source_dir, target_dir, num_samples_subdir, num_samples_images):
    sampled_subdirs = random.sample(subdirs, min(num_samples_subdir, len(subdirs)))

    for subdir in sampled_subdirs:
        subdir_path = os.path.join(root, subdir)
        files = os.listdir(subdir_path)
        image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]
        xml_files = [file for file in files if file.lower().endswith('xml')]

        # Ensure each image has a corresponding XML file
        image_xml_pairs = [(img, img.replace(os.path.splitext(img)[1], '.xml')) for img in image_files if img.replace(os.path.splitext(img)[1], '.xml') in xml_files]

        # Sample the pairs
        sampled_pairs = random.sample(image_xml_pairs, min(num_samples_images, len(image_xml_pairs)))

        # Create the corresponding target directory
        relative_path = os.path.relpath(subdir_path, source_dir)
        target_subdir = os.path.join(target_dir, relative_path)
        os.makedirs(target_subdir, exist_ok=True)

        # Copy the sampled files
        for img, xml in sampled_pairs:
            shutil.copy(os.path.join(subdir_path, img), os.path.join(target_subdir, img))
            shutil.copy(os.path.join(subdir_path, xml), os.path.join(target_subdir, xml))

def sample_and_copy_files(source_dir, target_dir, num_samples_subdir=10, num_samples_images=50):
    with ThreadPoolExecutor() as executor:
        futures = []
        for root, dirs, _ in os.walk(source_dir):
            if root == source_dir:
                for ps_dir in dirs:
                    ps_path = os.path.join(root, ps_dir)
                    ps_subdirs = [d for d in os.listdir(ps_path) if os.path.isdir(os.path.join(ps_path, d))]
                    futures.append(executor.submit(sample_and_copy_files_from_subdir, ps_path, ps_subdirs, source_dir, target_dir, num_samples_subdir, num_samples_images))

        for future in as_completed(futures):
            future.result()

if __name__ == "__main__":
    source_dir = r"C:\Users\ybr5070\Desktop\all_frames"  # Replace with the path to your source directory
    target_dir = r"C:\Users\ybr5070\Desktop\sampled"  # Replace with the path to your target directory
    sample_and_copy_files(source_dir, target_dir)


## split into train, val,test

In [None]:
import os
import shutil
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

def split_and_copy_folders(base_folder, train_folder, test_folder, val_folder, train_pct=0.7, test_pct=0.15):
    all_subfolders = []
    for root, dirs, _ in os.walk(base_folder):
        if root != base_folder:
            for subdir in dirs:
                all_subfolders.append(os.path.join(root, subdir))

    random.shuffle(all_subfolders)

    total_subfolders = len(all_subfolders)
    train_end = int(total_subfolders * train_pct)
    test_end = train_end + int(total_subfolders * test_pct)

    train_subfolders = all_subfolders[:train_end]
    test_subfolders = all_subfolders[train_end:test_end]
    val_subfolders = all_subfolders[test_end:]

    def copy_subfolders(subfolders, destination):
        for subfolder in subfolders:
            subfolder_name = os.path.basename(subfolder)
            dst_path = os.path.join(destination, subfolder_name)
            try:
                shutil.copytree(subfolder, dst_path)
                print(f"Copied {subfolder_name} to {destination}")
            except Exception as e:
                print(f"Error copying {subfolder_name} to {destination}: {e}")

    with ThreadPoolExecutor() as executor:
        futures = []
        futures.append(executor.submit(copy_subfolders, train_subfolders, train_folder))
        futures.append(executor.submit(copy_subfolders, test_subfolders, test_folder))
        futures.append(executor.submit(copy_subfolders, val_subfolders, val_folder))

        for future in as_completed(futures):
            future.result()

# Example usage
base_folder = r"C:\Users\ybr5070\Desktop\sampled"
train_folder = r"C:\Users\ybr5070\Desktop\split\train"
test_folder = r"C:\Users\ybr5070\Desktop\split\test"
val_folder = r"C:\Users\ybr5070\Desktop\split\val"

split_and_copy_folders(base_folder, train_folder, test_folder, val_folder)


# Transformations on train set

In [1]:
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import os
import glob
import logging
from concurrent.futures import ThreadPoolExecutor


# Function to read the image frame and the corresponding XML file
def read_frame_and_xml(frame_path, xml_path):
    frame = cv2.imread(frame_path)
    tree = ET.parse(xml_path)
    root = tree.getroot()
    return frame, tree, root

# Image transformation functions
def y_reflection(frame):
    return cv2.flip(frame, 1)

def gaussian_blur(frame, kernel_size=(5, 5), sigmaX=0):
    return cv2.GaussianBlur(frame, kernel_size, sigmaX)

def adjust_brightness(frame, value=30):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value
    final_hsv = cv2.merge((h, s, v))
    frame = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return frame

def change_orientation_to_portrait(frame):
    if frame.shape[1] > frame.shape[0]:
        frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
    return frame

def crop_and_rotate(frame, angle=10):
    height, width = frame.shape[:2]
    center = (width // 2, height // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)

    abs_cos = abs(M[0, 0])
    abs_sin = abs(M[0, 1])

    bound_w = int(height * abs_sin + width * abs_cos)
    bound_h = int(height * abs_cos + width * abs_sin)

    M[0, 2] += bound_w / 2 - center[0]
    M[1, 2] += bound_h / 2 - center[1]

    rotated = cv2.warpAffine(frame, M, (bound_w, bound_h))
    return rotated

# XML transformation functions
def update_xml_for_y_reflection(root, image_width):
    for obj in root.iter('object'):
        bndbox = obj.find('bndbox')
        xmin = image_width - int(bndbox.find('xmax').text)
        xmax = image_width - int(bndbox.find('xmin').text)
        bndbox.find('xmin').text = str(xmin)
        bndbox.find('xmax').text = str(xmax)
    return root

def rotate_point(x, y, M):
    point = np.array([[x], [y], [1]])
    transformed_point = np.dot(M, point)
    return int(transformed_point[0]), int(transformed_point[1])

def update_xml_for_portrait(root, original_width, original_height):
    for obj in root.iter('object'):
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        new_xmin = original_height - ymax
        new_ymin = xmin
        new_xmax = original_height - ymin
        new_ymax = xmax

        bndbox.find('xmin').text = str(new_xmin)
        bndbox.find('ymin').text = str(new_ymin)
        bndbox.find('xmax').text = str(new_xmax)
        bndbox.find('ymax').text = str(new_ymax)

    return root

def update_xml_for_rotation(root, angle, image_width, image_height):
    center_x, center_y = image_width // 2, image_height // 2
    M = cv2.getRotationMatrix2D((center_x, center_y), angle, 1.0)

    abs_cos = abs(M[0, 0])
    abs_sin = abs(M[0, 1])
    bound_w = int(image_height * abs_sin + image_width * abs_cos)
    bound_h = int(image_height * abs_cos + image_width * abs_sin)

    M[0, 2] += bound_w / 2 - center_x
    M[1, 2] += bound_h / 2 - center_y

    for obj in root.iter('object'):
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        rotated_corners = [rotate_point(x, y, M) for x, y in [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]]

        xs = [x for x, _ in rotated_corners]
        ys = [y for _, y in rotated_corners]
        new_xmin, new_xmax = min(xs), max(xs)
        new_ymin, new_ymax = min(ys), max(ys)

        bndbox.find('xmin').text = str(new_xmin)
        bndbox.find('ymin').text = str(new_ymin)
        bndbox.find('xmax').text = str(new_xmax)
        bndbox.find('ymax').text = str(new_ymax)

    return root

def process_frame_and_xml(frame_path, xml_path, output_path):
    filename = os.path.splitext(os.path.basename(frame_path))[0]
    transformations = {
        'y_reflected': (y_reflection, update_xml_for_y_reflection),
        'blurred': (gaussian_blur, None),
        'brightness_adjusted': (adjust_brightness, None),
        'portrait': (change_orientation_to_portrait, update_xml_for_portrait),
        'rotated': (lambda f: crop_and_rotate(f, angle=10), update_xml_for_rotation)
    }

    for trans_name, (trans_func, xml_func) in transformations.items():
        try:
            original_frame, tree, root = read_frame_and_xml(frame_path, xml_path)
            transformed_frame = trans_func(original_frame)
            new_root = ET.ElementTree(root).getroot() if xml_func else root

            if xml_func:
                if trans_name == 'rotated':
                    new_root = xml_func(new_root, 10, original_frame.shape[1], original_frame.shape[0])
                elif trans_name == 'portrait':
                    new_root = xml_func(new_root, original_frame.shape[1], original_frame.shape[0])
                elif trans_name == 'y_reflected':
                    new_root = xml_func(new_root, original_frame.shape[1])

            new_tree = ET.ElementTree(new_root)
            new_tree.write(os.path.join(output_path, f'{filename}_{trans_name}.xml'))
            cv2.imwrite(os.path.join(output_path, f'{filename}_{trans_name}.jpg'), transformed_frame)
            print(f"Processed {filename} with transformation {trans_name}")
        except Exception as e:
            print(f"Error processing {filename} with transformation {trans_name}: {e}")

def process_directory(input_dir, output_dir):
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        futures = []
        for subject_folder in os.listdir(input_dir):
            subject_path = os.path.join(input_dir, subject_folder)
            if os.path.isdir(subject_path):
                output_subject_path = os.path.join(output_dir, subject_folder)
                os.makedirs(output_subject_path, exist_ok=True)

                frame_paths = glob.glob(os.path.join(subject_path, '*.jpg'))
                xml_paths = {os.path.splitext(os.path.basename(p))[0]: p for p in glob.glob(os.path.join(subject_path, '*.xml'))}

                for frame_path in frame_paths:
                    frame_basename = os.path.splitext(os.path.basename(frame_path))[0]
                    xml_path = xml_paths.get(frame_basename)
                    if xml_path:
                        future = executor.submit(process_frame_and_xml, frame_path, xml_path, output_subject_path)
                        futures.append(future)
                    else:
                        print(f"Skipping frame {frame_basename} due to missing XML file")

        for future in futures:
            future.result()

    print("All directories processed!")

# Example usage
train_dir =r"C:\Users\ybr5070\Desktop\split\train"  # Replace with your actual train directory
output_dir = r"C:\Users\ybr5070\Desktop\split\train_trans" # Replace with your actual output directory

process_directory(train_dir, output_dir)


Processed frame10147 with transformation y_reflectedProcessed frame10012 with transformation y_reflected
Processed frame1408 with transformation y_reflected

Processed frame1671 with transformation y_reflected
Processed frame231 with transformation y_reflected
Processed frame2465 with transformation y_reflected
Processed frame2284 with transformation y_reflected
Processed frame2427 with transformation y_reflected
Processed frame1963 with transformation y_reflected
Processed frame2097 with transformation y_reflected
Processed frame2812 with transformation y_reflected
Processed frame4904 with transformation y_reflected
Processed frame4283 with transformation y_reflected
Processed frame2532 with transformation y_reflected
Processed frame3450 with transformation y_reflected
Processed frame4034 with transformation y_reflected
Processed frame3260 with transformation y_reflected
Processed frame4597 with transformation y_reflected
Processed frame2827 with transformation y_reflected
Processed f

# resize to 510,300

In [1]:
import os
from PIL import Image
import xml.etree.ElementTree as ET
from concurrent.futures import ThreadPoolExecutor, as_completed

def resize_image(image_path, output_path, target_size):
    try:
        with Image.open(image_path) as img:
            original_size = img.size
            img = img.resize(target_size, Image.LANCZOS)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            img.save(output_path)
        print(f"Resized image saved to {output_path}")
        return original_size
    except Exception as e:
        print(f"Error resizing image {image_path}: {e}")
        return None

def resize_xml(xml_path, output_path, original_size, target_size):
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        
        for size in root.findall('size'):
            size.find('width').text = str(target_size[0])
            size.find('height').text = str(target_size[1])

        scale_x = target_size[0] / original_size[0]
        scale_y = target_size[1] / original_size[1]

        for obj in root.findall('object'):
            for box in obj.findall('bndbox'):
                box.find('xmin').text = str(int(float(box.find('xmin').text) * scale_x))
                box.find('ymin').text = str(int(float(box.find('ymin').text) * scale_y))
                box.find('xmax').text = str(int(float(box.find('xmax').text) * scale_x))
                box.find('ymax').text = str(int(float(box.find('ymax').text) * scale_y))

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        tree.write(output_path)
        print(f"Resized XML saved to {output_path}")
    except Exception as e:
        print(f"Error resizing XML {xml_path}: {e}")

def process_files(root, files, source_base_dir, output_base_dir, target_size):
    for file in files:
        if file.lower().endswith(('jpg', 'jpeg', 'png')):
            image_path = os.path.join(root, file)
            relative_path = os.path.relpath(image_path, source_base_dir)
            output_image_path = os.path.join(output_base_dir, relative_path)

            xml_path = image_path.replace(os.path.splitext(file)[1], '.xml')
            output_xml_path = output_image_path.replace(os.path.splitext(file)[1], '.xml')

            original_size = resize_image(image_path, output_image_path, target_size)
            if original_size and os.path.exists(xml_path):
                resize_xml(xml_path, output_xml_path, original_size, target_size)

def resize_files_in_directory(source_base_dir, output_base_dir, target_size=(510,510)):
    with ThreadPoolExecutor() as executor:
        futures = []
        for root, dirs, files in os.walk(source_base_dir):
            futures.append(executor.submit(process_files, root, files, source_base_dir, output_base_dir, target_size))

        for future in as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"Error processing files: {e}")

    print("All files processed!")

# Run the resizing process
source_base_dir = r"C:\Users\ybr5070\Desktop\split"  # Replace with the path to your new sampled directory
output_base_dir = r"C:\Users\ybr5070\Desktop\split_resized"  # Replace with the path to your resized output directory
resize_files_in_directory(source_base_dir, output_base_dir)

Resized image saved to C:\Users\ybr5070\Desktop\split_resized\test\R01_072_V4_PS2_fixed\frame10267.jpg
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\train\R01_001_V2_PS1_fixed\frame10012.jpg
Resized XML saved to C:\Users\ybr5070\Desktop\split_resized\test\R01_072_V4_PS2_fixed\frame10267.xml
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\train\R01_006_V4_PS1_fixed\frame1037.jpg
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\test\R01_069_V2_PS1_fixed\frame1114.jpg
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\test\R01_127_V3_PS2_fixed\frame1162.jpg
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\train\R01_038_V3_PS1_fixed\frame1065.jpg
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\train\R01_009_V4_PS4_fixed\frame1015.jpg
Resized XML saved to C:\Users\ybr5070\Desktop\split_resized\train\R01_001_V2_PS1_fixed\frame10012.xml
Resized image saved to C:\Users\ybr5070\Desktop\split_resized\train\R01_005_V

# convert xml to yolo files
remember to add classes.txt everywhere

In [2]:
import os
import shutil
import concurrent.futures
import xml.etree.ElementTree as ET

def convert_xml_to_yolo(xml_file, dest_dir):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)

    yolo_annotations = []

    for obj in root.findall('object'):
        name = obj.find('name').text
        if name == 'child_face':  # Assuming class 'child_face' has an ID of 15
            class_id = 15
        else:
            continue

        xmlbox = obj.find('bndbox')
        xmin = int(xmlbox.find('xmin').text)
        xmax = int(xmlbox.find('xmax').text)
        ymin = int(xmlbox.find('ymin').text)
        ymax = int(xmlbox.find('ymax').text)

        x_center = (xmin + xmax) / 2.0 / width
        y_center = (ymin + ymax) / 2.0 / height
        w = (xmax - xmin) / float(width)
        h = (ymax - ymin) / float(height)

        yolo_annotations.append(f"{class_id} {x_center} {y_center} {w} {h}")

    yolo_file = os.path.join(dest_dir, os.path.splitext(os.path.basename(xml_file))[0] + '.txt')
    ensure_directory_exists(dest_dir)
    with open(yolo_file, 'w') as f:
        f.write("\n".join(yolo_annotations))

def ensure_directory_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def move_file(src_file, dest_dir):
    ensure_directory_exists(dest_dir)
    shutil.copy(src_file, os.path.join(dest_dir, os.path.basename(src_file)))

def process_annotations(src_dir, dest_txt_dir):
    xml_tasks = []

    for root, dirs, files in os.walk(src_dir):
        for file in files:
            if file.endswith(".xml"):
                src_file = os.path.join(root, file)
                dest = os.path.join(dest_txt_dir, os.path.relpath(root, src_dir))
                xml_tasks.append((src_file, dest))
                convert_xml_to_yolo(src_file, dest)

    return xml_tasks

base_dir = r"C:\Users\ybr5070\Desktop\split_resized"
new_base_dir = r"C:\Users\ybr5070\Desktop\yolo_split\labels"
categories = ["train", "train_trans", "val", "test"]

all_txt_tasks = []

# Process XML tasks (conversion and move)
for category in categories:
    src_dir = os.path.join(base_dir, category)
    dest_txt_dir = os.path.join(new_base_dir, category)
    process_annotations(src_dir, dest_txt_dir)


# move images 

In [3]:
import os
import shutil
import concurrent.futures

def ensure_directory_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def move_file(src_file, dest_dir):
    ensure_directory_exists(dest_dir)
    shutil.copy(src_file, os.path.join(dest_dir, os.path.basename(src_file)))

def process_images(src_dir, dest_img_dir):
    img_tasks = []

    for root, dirs, files in os.walk(src_dir):
        for file in files:
            if file.endswith(".jpg"):  # Ensure it only processes .jpg files
                src_file = os.path.join(root, file)
                dest = os.path.join(dest_img_dir, os.path.relpath(root, src_dir))
                img_tasks.append((src_file, dest))

    return img_tasks

def move_files_in_parallel(tasks):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        executor.map(lambda p: move_file(*p), tasks)

base_dir = r"C:\Users\ybr5070\Desktop\split_resized"
new_base_dir = r"C:\Users\ybr5070\Desktop\yolo_split\images"
categories = ["train", "train_trans", "val", "test"]

all_img_tasks = []

# Process image tasks
for category in categories:
    src_dir = os.path.join(base_dir, category)
    dest_img_dir = os.path.join(new_base_dir, category)
    img_tasks = process_images(src_dir, dest_img_dir)
    all_img_tasks.extend(img_tasks)

# Ensure all directories are created before copying files
for task in all_img_tasks:
    _, dest_dir = task
    ensure_directory_exists(dest_dir)

# Move image files in parallel
move_files_in_parallel(all_img_tasks)

# merge bucket by bucket 

In [4]:
import os
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed

# Define the base paths
base_path = r"C:\Users\ybr5070\Desktop\yolo_split"
output_path = r"C:\Users\ybr5070\Desktop\yolo_merged_1"

# Ensure the output directories exist
os.makedirs(os.path.join(output_path, "images", "train"), exist_ok=True)
os.makedirs(os.path.join(output_path, "images", "val"), exist_ok=True)
os.makedirs(os.path.join(output_path, "images", "test"), exist_ok=True)
os.makedirs(os.path.join(output_path, "images", "trans"), exist_ok=True)

os.makedirs(os.path.join(output_path, "labels", "train"), exist_ok=True)
os.makedirs(os.path.join(output_path, "labels", "val"), exist_ok=True)
os.makedirs(os.path.join(output_path, "labels", "test"), exist_ok=True)
os.makedirs(os.path.join(output_path, "labels", "trans"), exist_ok=True)

def copy_and_rename_file(image_file, label_file, dest_image_dir, dest_label_dir, index):
    # Copy image and label files to the destination directory with new index
    shutil.copy(image_file, os.path.join(dest_image_dir, f"{index}.jpg"))
    shutil.copy(label_file, os.path.join(dest_label_dir, f"{index}.txt"))

def process_directory(source_image_dir, source_label_dir, dest_image_dir, dest_label_dir, start_index):
    index = start_index
    tasks = []
    with ThreadPoolExecutor(max_workers=8) as executor:
        for root, _, files in os.walk(source_image_dir):
            for file in files:
                if file.endswith(".jpg") or file.endswith(".png"):
                    image_file = os.path.join(root, file)
                    label_file = os.path.join(root.replace('images', 'labels'), file.replace('.jpg', '.txt').replace('.png', '.txt'))
                    tasks.append(executor.submit(copy_and_rename_file, image_file, label_file, dest_image_dir, dest_label_dir, index))
                    index += 1
        for task in as_completed(tasks):
            task.result()
    return index

# Process each set
index_train = process_directory(os.path.join(base_path, "images", "train"), os.path.join(base_path, "labels", "train"), os.path.join(output_path, "images", "train"), os.path.join(output_path, "labels", "train"), 0)
index_trans = process_directory(os.path.join(base_path, "images", "trans"), os.path.join(base_path, "labels", "trans"), os.path.join(output_path, "images", "trans"), os.path.join(output_path, "labels", "trans"), 0)
index_val = process_directory(os.path.join(base_path, "images", "val"), os.path.join(base_path, "labels", "val"), os.path.join(output_path, "images", "val"), os.path.join(output_path, "labels", "val"), 0)
index_test = process_directory(os.path.join(base_path, "images", "test"), os.path.join(base_path, "labels", "test"), os.path.join(output_path, "images", "test"), os.path.join(output_path, "labels", "test"), 0)

# Add train_trans to train after trans
index_train_trans = process_directory(os.path.join(base_path, "images", "train_trans"), os.path.join(base_path, "labels", "train_trans"), os.path.join(output_path, "images", "train"), os.path.join(output_path, "labels", "train"), index_train)

print("Files have been successfully renamed and merged.")


Files have been successfully renamed and merged.


# cleanup classes and annotations

In [6]:
import os

# Mapping of class names to class IDs
class_name_to_id = {
    'dog': 0,
    'person': 1,
    'cat': 2,
    'tv': 3,
    'car': 4,
    'meatballs': 5,
    'marinara sauce': 6,
    'tomato soup': 7,
    'chicken noodle soup': 8,
    'french onion soup': 9,
    'chicken breast': 10,
    'ribs': 11,
    'pulled pork': 12,
    'hamburger': 13,
    'cavity': 14,
    'child_face': 15
}

def convert_annotation(file_path, target_class_id, new_class_id):
    """
    Convert annotations for target_class_id to new_class_id and remove others.

    :param file_path: Path to the annotation file
    :param target_class_id: The original class ID for child_face
    :param new_class_id: The new class ID to assign (0)
    """
    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()

        with open(file_path, 'w') as f:
            for line in lines:
                parts = line.split()
                try:
                    class_id = int(parts[0])
                except ValueError:
                    class_id = class_name_to_id[parts[0]]

                if class_id == target_class_id:
                    parts[0] = str(new_class_id)
                    f.write(" ".join(parts) + '\n')
                elif class_id == new_class_id:
                    f.write(" ".join(parts) + '\n')
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

def process_split(split_dir, target_class_id, new_class_id):
    """
    Process all annotation files in the given split directory.

    :param split_dir: Directory containing the annotation files
    :param target_class_id: The original class ID for child_face
    :param new_class_id: The new class ID to assign (0)
    """
    files = [os.path.join(split_dir, f) for f in os.listdir(split_dir) if os.path.isfile(os.path.join(split_dir, f))]
    for file_path in files:
        convert_annotation(file_path, target_class_id, new_class_id)

def main():
    label_dir = r"C:\Users\ybr5070\Desktop\yolo_merged_1\labels"   # Update with your labels directory path
    target_class_name = 'child_face'  # The original class name for child_face
    new_class_id = 0  # The new class ID for child_face
    target_class_id = class_name_to_id[target_class_name]

    for split in ["train", "test", "val"]:
        split_dir = os.path.join(label_dir, split)
        process_split(split_dir, target_class_id, new_class_id)

if __name__ == "__main__":
    main()


Error processing file C:\Users\ybr5070\Desktop\yolo_merged_1\labels\train\classes.txt: 'marinara'
Error processing file C:\Users\ybr5070\Desktop\yolo_merged_1\labels\test\classes.txt: 'marinara'
Error processing file C:\Users\ybr5070\Desktop\yolo_merged_1\labels\val\classes.txt: 'marinara'


# check labels

In [8]:
import os

def check_labels(split_dir):
    """
    Check if all annotations have the label 0 in the given split directory.

    :param split_dir: Directory containing the annotation files
    :return: List of files with incorrect labels
    """
    incorrect_files = []
    files = [os.path.join(split_dir, f) for f in os.listdir(split_dir) if os.path.isfile(os.path.join(split_dir, f))]
    for file_path in files:
        with open(file_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                parts = line.split()
                class_id = int(parts[0])
                if class_id != 0:
                    incorrect_files.append(file_path)
                    break
    return incorrect_files

def main():
    label_dir = r"C:\Users\ybr5070\Desktop\yolo_merged_1\labels"  # Update with your labels directory path
    incorrect_files = []
    
    for split in ["train", "test", "val"]:
        split_dir = os.path.join(label_dir, split)
        incorrect_files.extend(check_labels(split_dir))
    
    if incorrect_files:
        print("The following files contain labels other than 0:")
        for file in incorrect_files:
            print(file)
    else:
        print("All annotations have the label 0.")

if __name__ == "__main__":
    main()


All annotations have the label 0.
