Since the model will be trained using different datasets, these datasets need to be processed by changeing classes indexs, deleting unwanted classes, and merging them in one dataset.  

In [None]:
DATASETS = [
    {
        'name': 'D1',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'classes': {
            0: 0,  # Open Eye
            1: 1,  # Closed Eye
        }
    },

    {
        'name': 'D2',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'classes': {
            0: 2,  # HandsNotOnWheel
            1: 3,  # HandsOnWheel
        }
    },
    
        {
        'name': 'D3',
        'data': ['train', 'valid', 'test'],
        'num_classes': 3,
        'classes': {
            0: 4,  # Cigarette
            1: 5,  # Phone
            2: 6,  # Seatbelt
        }
    },

    # {
    #     'name': 'D3',  # NO TEST DATA
    #     'data': ['train', 'valid'],
    #     'num_classes': 2,
    #     'classes': {
    #         0: 2,  # awake
    #         1: 3,  # drowsy
    #     }
    # },

    # {
    #     'name': 'D4',
    #     'data': ['train', 'valid', 'test'],
    #     'num_classes': 3,
    #     'classes': {
    #         0: 6,  # Cigarette
    #         1: 7,  # Phone
    #         2: 8,  # Seatbelt
    #     }
    # },
]

In [None]:
CLASSES_NAMES = {
    0: 'Open Eye',
    1: 'Closed Eye',
    2: 'HandsNotOnWheel',
    3: 'HandsOnWheel',
    4: 'Cigarette',
    5: 'Phone',
    6: 'Seatbelt'
}

In [None]:
def is_empty_file(file_path):
    with open(file_path, 'r') as file:
        for line in file:
            # Check if the line contains non-whitespace characters
            if line.strip():
                return False
    # If the loop completes without finding non-whitespace characters, the file is empty
    return True

In [None]:
import os
import shutil  # Import shutil for moving files

# Initialize a counter to keep track of moved files
moved_counter = 0
format_error_counter = 0

# Loop through each dataset in the DATASETS list
for dataset in DATASETS:
    print(f"Processing dataset: {dataset['name']}")

    # Iterate over data splits: 'train', 'valid', and 'test'
    for data in dataset['data']:
        print(f"Checking {dataset['name']}'s {data} directory...")

        # Define paths to label and image directories
        labels_dir = f"Seperate Datasets/{dataset['name']}/{data}/labels"
        images_dir = f"Seperate Datasets/{dataset['name']}/{data}/images"

        # Create a "deleted" directory if it doesn't exist
        deleted_dir = os.path.join("Seperate Datasets/", dataset['name'], data, "deleted")
        os.makedirs(deleted_dir, exist_ok=True)

        # Iterate through label files in the labels directory
        for filename in os.listdir(labels_dir):
            if filename.endswith('.txt'):
                # Determine the corresponding image name
                corresponding_image_name = filename[:-3] + 'jpg'

                # Define paths to the source (to be deleted) and destination (deleted_dir) files
                source_label_path = os.path.join(labels_dir, filename)
                source_image_path = os.path.join(images_dir, corresponding_image_name)
                destination_label_path = os.path.join(deleted_dir, filename)
                destination_image_path = os.path.join(deleted_dir, corresponding_image_name)

                # Check if the label file is empty
                if is_empty_file(os.path.join(labels_dir, filename)):
                    # Move both the label and image files to the "deleted" directory
                    shutil.move(source_label_path, destination_label_path)
                    shutil.move(source_image_path, destination_image_path)

                    # Print a message about the move
                    print(f'Moved: {filename} and its corresponding image to "deleted" directory.')

                    # Increment the moved file counter
                    moved_counter += 1
                    continue

                # Check if for corresponding image existence
                if not os.path.exists(source_image_path):
                    # Move both the label and image files to the "deleted" directory
                    shutil.move(source_label_path, destination_label_path)
                    shutil.move(source_image_path, destination_image_path)

                    # Print a message about the move
                    print(f'Moved: {filename} to "deleted" directory, because no corresponding image was found.')

                    # Increment the moved file counter
                    moved_counter += 1
                    continue

                with open(os.path.join(labels_dir, filename), 'r') as f:
                    lines = f.readlines()

                # Open the label file for writing
                with open(os.path.join(labels_dir, filename), 'w') as f:
                    # Process each line in the label file
                    for line in lines:
                        if len(line) == 0:
                            continue

                        # Create a list to store new lines with updated class mappings
                        updated_lines = []

                        # Iterate through class mappings in the dataset's 'classes' dictionary
                        # Some label files include detection and segmentation info
                        line_format_error = False
                        for d_class in dataset['classes']:
                            elements = line.strip().split()
                            if len(elements) != 5:
                                line_format_error = True
                                format_error_counter += 1
                                continue
                            object_class = elements[0]

                            if object_class == str(d_class):
                                # Replace the object class with the new class
                                new_object_class = str(dataset['classes'][d_class])
                                updated_line = new_object_class + line[len(object_class):]
                                updated_lines.append(updated_line)

                        # If there are updated lines, write them; otherwise, write the original line
                        if updated_lines or line_format_error:
                            f.writelines(updated_lines)
                        else:
                            f.write(line)

        print('===\n')

    # Print a message indicating the completion of dataset processing
    print(f"Finished processing dataset: {dataset['name']}...")
    print('==== ==== ==== ====\n\n')

# Print the total number of moved files across all datasets
print(f"Total moved files: {moved_counter}")
print(f"Total files with wrong YoloV8 (Detect) format: {format_error_counter}")

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import random


# Function to read YOLO-style label files and parse the bounding box info
def read_label_file(label_file_path):
    with open(label_file_path, 'r') as file:
        lines = file.readlines()

    objects = []
    for line in lines:
        elements = line.strip().split()
        class_id = int(elements[0])
        center_x, center_y, box_width, box_height = map(float, elements[1:])
        objects.append((class_id, center_x, center_y, box_width, box_height))

    return objects


for dataset in DATASETS:
    print(f"Shwoing images from dataset: {dataset['name']}")
    # Define the path to the directory containing label and image files
    dataset_dir = f"Seperate Datasets/{dataset['name']}/train"  # Replace DATASET_NAME with the actual dataset name

    # List all image files in the directory
    image_files = [f for f in os.listdir(os.path.join(dataset_dir, 'images')) if f.endswith('.jpg')]
    image_files = random.sample(image_files, 20)

    # Loop through each image file and plot detected objects
    for image_file in image_files:
        # Load the image
        image_path = os.path.join(dataset_dir, 'images', image_file)
        image = cv2.imread(image_path)

        # Load the corresponding label file
        label_file_path = os.path.join(dataset_dir, 'labels', image_file.replace('.jpg', '.txt'))

        # Check if the label file exists
        if os.path.exists(label_file_path):
            # Parse the label file to get object info
            objects = read_label_file(label_file_path)

            # Loop through detected objects and draw bounding boxes with labels
            for obj in objects:
                class_id, center_x, center_y, box_width, box_height = obj

                # Convert YOLO format to pixel coordinates
                height, width, _ = image.shape
                left = int((center_x - box_width / 2) * width)
                top = int((center_y - box_height / 2) * height)
                right = int((center_x + box_width / 2) * width)
                bottom = int((center_y + box_height / 2) * height)

                # Draw bounding box on the image
                cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)

                # Add class label
                class_label = CLASSES_NAMES[class_id]
                label_text = f'{class_label}'
                cv2.putText(image, label_text, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display the image with bounding boxes and labels
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(f'Detected Objects in {image_file}')
        plt.axis('off')
        plt.show()


In [None]:
FINAL_DATASET = {
    'name': 'FINAL_DATASET',
    'data': ['train', 'valid', 'test'],
    'num_classes': 9,
    'classes': {
        0: 'Open Eye',
        1: 'Closed Eye',
        2: 'awake',
        3: 'drowsy',
        4: 'HandsNotOnWheel',
        5: 'HandsOnWheel',
        6: 'Cigarette',
        7: 'Phone',
        8: 'Seatbelt'
    }
}

In [None]:
# Merging all datasets into one dataset
import os
import shutil

destination_directory = 'full dataset'

for dataset in DATASETS:
    print(f"Copying {dataset['name']} files...")
    data_dir = f"Seperate Datasets/{dataset['name']}/"
    shutil.copytree(data_dir, destination_directory, dirs_exist_ok=True)

print('Finished copying')

In [None]:
import os

classes_counters = [0] * FINAL_DATASET['num_classes']

for data in FINAL_DATASET['data']:
    current_dir = os.path.join('full dataset', data, 'labels')
    # Iterate through label files in the labels directory
    for filename in os.listdir(current_dir):
        with open(os.path.join(current_dir, filename), 'r') as f:
            lines = f.readlines()

        for line in lines:
            # Iterate through class mappings in the dataset's 'classes' dictionary
            elements = line.strip().split()
            object_class = elements[0]
            classes_counters[int(object_class)] += 1

for i in range(0, FINAL_DATASET['num_classes']):
    print(f"Found {classes_counters[i]} instances of class {FINAL_DATASET['classes'][i]}")

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import random


# Function to read YOLO-style label files and parse the bounding box info
def read_label_file(label_file_path):
    with open(label_file_path, 'r') as file:
        lines = file.readlines()

    objects = []
    for line in lines:
        elements = line.strip().split()
        class_id = int(elements[0])
        center_x, center_y, box_width, box_height = map(float, elements[1:])
        objects.append((class_id, center_x, center_y, box_width, box_height))

    return objects


print(f"Shwoing images from dataset:")
# Define the path to the directory containing label and image files
dataset_dir = f"full dataset/train"  # Replace DATASET_NAME with the actual dataset name

# List all image files in the directory
image_files = [f for f in os.listdir(os.path.join(dataset_dir, 'images')) if f.endswith('.jpg')]
image_files = random.sample(image_files, 100)

# Loop through each image file and plot detected objects
for image_file in image_files:
    # Load the image
    image_path = os.path.join(dataset_dir, 'images', image_file)
    image = cv2.imread(image_path)

    # Load the corresponding label file
    label_file_path = os.path.join(dataset_dir, 'labels', image_file.replace('.jpg', '.txt'))

    # Check if the label file exists
    if os.path.exists(label_file_path):
        # Parse the label file to get object info
        objects = read_label_file(label_file_path)

        # Loop through detected objects and draw bounding boxes with labels
        for obj in objects:
            class_id, center_x, center_y, box_width, box_height = obj

            # Convert YOLO format to pixel coordinates
            height, width, _ = image.shape
            left = int((center_x - box_width / 2) * width)
            top = int((center_y - box_height / 2) * height)
            right = int((center_x + box_width / 2) * width)
            bottom = int((center_y + box_height / 2) * height)

            # Draw bounding box on the image
            cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)

            # Add class label
            class_label = CLASSES_NAMES[class_id]
            label_text = f'{class_label}'
            cv2.putText(image, label_text, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the image with bounding boxes and labels
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title(f'Detected Objects in {image_file}')
    plt.axis('off')
    plt.show()