## Importing the necessary libraries, and declaring constants. 

In [None]:
import os
import shutil
import random
import cv2
import matplotlib.pyplot as plt
from ultralytics import YOLO

In [None]:
DATASETS_TO_PROCESS_PATH = 'datasets/To Process'
DATASETS_PROCESSED_PATH = 'datasets/Processed'
FINAL_DATASET_PATH = 'datasets/final'

CLASS_NUM_OPEN_EYE = 0
CLASS_NUM_CLOSED_EYE = 1
CLASS_NUM_CIGARETTE = 2
CLASS_NUM_PHONE = 3
CLASS_NUM_SEATBELT = 4

CLASSES_NAMES = {
    0: 'Open Eye',
    1: 'Closed Eye',
    2: 'Phone',
    3: 'Cigarette',
    4: 'Seatbelt',
}

## Declaring a dictionary of datasets, where each dataset has:
* Name (the actual path to the dataset)
* Data (training, testing, and validation)
* Numer of Classes
* Initial Classes (the classes the dataset has labels and annotations for)
* Target Classes (mapping original class with a new class)
* Dataset Link


In [None]:
DATASETS = [
    {
        'name': '1 - (Awake - Drowsy)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'initial_classes': {
            0: 'Awake',
            1: 'Drowsy',
        },
        'target_classes': {
            0: 0,
            1: 1,
        },
        'dataset link': 'https://universe.roboflow.com/fyp-vzed3/drivers-drowsiness-detection/dataset/1'
    },

    {
        'name': '2 - (Awake - Drowsy)',
        'data': ['train', 'valid'],
        'num_classes': 2,
        'initial_classes': {
            0: 'Awake',
            1: 'Drowsy',
        },
        'target_classes': {
            0: 0,
            1: 1,
        },
        'dataset link': 'https://universe.roboflow.com/hufs/dowsy_detectioin/dataset/2'
    },

    {
        'name': '3 - (Open Eye - Closed Eye)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'initial_classes': {
            0: 'Open Eye',
            1: 'Closed Eye',
        },
        'target_classes': {
            2: 0,
            3: 1,
        },
        'dataset link': 'https://universe.roboflow.com/553624589-qq-com/eye_detection-zztm1/dataset/1'
    },

    {
        'name': '4 - (Open Eye - Closed Eye - Yawn)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 3,
        'initial_classes': {
            0: 'Open Eye',
            1: 'Closed Eye',
            2: 'Yawn',  # To be deleted
        },
        'target_classes': {
            2: 0,  # Open Eye
            3: 1,  # Closed Eye
        },
        'dataset link': 'https://universe.roboflow.com/ntutee-project/drowsiness-driver/dataset/1'
    },

    {
        'name': '5 - (Phone)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 1,
        'initial_classes': {
            0: 'Phone',
        },
        'target_classes': {
            4: 0,
        },
        'dataset link': 'https://universe.roboflow.com/sarah-t/mobilephonedetector/dataset/1'
    },

    {
        'name': '6 - (Person - Cigarette)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'initial_classes': {
            0: 'Person',  # To be deleted
            1: 'Cigarette',
        },
        'target_classes': {
            5: 0,
        },
        'dataset link': 'https://universe.roboflow.com/ensam-swz2h/smoke-detection-88ggt/dataset/1'
    },

    {
        'name': '7 - (Seatbelt)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'classes': {
        },
        'initial_classes': {
            0: 'Seatbelt',
        },
        'target_classes': {
            6: 0,
        },
        'dataset link': 'https://universe.roboflow.com/calisma-wcihp/seatbelt-4i5ly/dataset/1'
    },

    {
        'name': '8 - (HandsNotOnWheel - HandsOnWheel)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'initial_classes': {
            0: 'HandsNotOnWheel',
            1: 'HandsOnWheel',
        },
        'target_classes': {
            7: 0,
            8: 1,
        },
        'dataset link': 'https://universe.roboflow.com/kritik-pancholi-gzk5j/hands_on_wheel_detection/dataset/5'
    },

    {
        'name': '9 - (Cigarette - Phone - Seatbelt)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 3,
        'initial_classes': {
            0: 'Cigarette',
            1: 'Phone',
            2: 'Seatbelt',
        },
        'target_classes': {
            0: 3,
            1: 2,
            2: 4,
        },
        'dataset link': 'https://universe.roboflow.com/jui/driver-behaviors'
    },
]


## A function to read and return class and annotations of a YOLOv8 label file.

In [None]:
# Function to read YOLO-style label files and parse the bounding box info
def read_label_file(label_file_path):
    with open(label_file_path, 'r') as file:
        lines = file.readlines()
    objects = []
    for line in lines:
        if line == '\n':
            continue
        elements = line.strip().split()
        class_id = int(elements[0])
        center_x, center_y, box_width, box_height = map(float, elements[1:])
        objects.append((class_id, center_x, center_y, box_width, box_height))

    return objects

## Plotting 2 images per dataset with each image annotations. 

In [None]:
for dataset in DATASETS:
    print(f"Showing images from dataset: {dataset['name']}.")
    # Define the path to the directory containing label and image files
    dataset_dir = f"{DATASETS_TO_PROCESS_PATH}/{dataset['name']}/train"

    # List of all image files in the directory
    image_files = [f for f in os.listdir(os.path.join(dataset_dir, 'images')) if f.endswith('.jpg')]
    # Choose 5 random images
    image_files = random.sample(image_files, 2)

    # Loop through each image file and plot detected objects
    for image_file in image_files:
        # Load the image
        image_path = os.path.join(dataset_dir, 'images', image_file)
        image = cv2.imread(image_path)

        # Load the corresponding label file
        label_file_path = os.path.join(dataset_dir, 'labels', image_file.replace('.jpg', '.txt'))

        # Check if the label file exists
        if os.path.exists(label_file_path):
            # Parse the label file to get object info
            objects = read_label_file(label_file_path)

            # Loop through detected objects and draw bounding boxes with labels
            for obj in objects:
                class_id, center_x, center_y, box_width, box_height = obj

                # Convert YOLO format to pixel coordinates
                height, width, _ = image.shape
                left = int((center_x - box_width / 2) * width)
                top = int((center_y - box_height / 2) * height)
                right = int((center_x + box_width / 2) * width)
                bottom = int((center_y + box_height / 2) * height)

                # Draw bounding box on the image
                cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)

                # Add class label
                class_label = dataset['initial_classes'][class_id]
                label_text = f'{class_label}'
                cv2.putText(image, label_text, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display the image with bounding boxes and labels
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(f'Detected Objects in {image_file}')
        plt.axis('off')
        plt.show()

### After reviewing the quality of the available datasets, the listed ones are to be used:

* 3 - (Open Eye - Closed Eye)
* 9 - (Cigarette - Phone - Seatbelt)


#### The updated DATASETS would be:


In [None]:
DATASETS = [
    {
        'name': '3 - (Open Eye - Closed Eye)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 2,
        'initial_classes': {
            0: 'Open Eye',
            1: 'Closed Eye',
        },
        'target_classes': {
            0: CLASS_NUM_OPEN_EYE,
            1: CLASS_NUM_CLOSED_EYE,
        },
        'dataset link': 'https://universe.roboflow.com/553624589-qq-com/eye_detection-zztm1/dataset/1'
    },

    {
        'name': '9 - (Cigarette - Phone - Seatbelt)',
        'data': ['train', 'valid', 'test'],
        'num_classes': 3,
        'initial_classes': {
            0: 'Cigarette',
            1: 'Phone',
            2: 'Seatbelt',
        },
        'target_classes': {
            0: CLASS_NUM_CIGARETTE,
            1: CLASS_NUM_PHONE,
            2: CLASS_NUM_SEATBELT,
        },
        'dataset link': 'https://universe.roboflow.com/sarah-t/mobilephonedetector/dataset/1'
    },
]

## Things to check for in the labels files:
* Empty label file.
* Non YOLOv8 Object Detection labeling format.

In [None]:
def is_empty_file(file_path):
    with open(file_path, 'r') as file:
        for line in file:
            # Check if the line contains non-whitespace characters
            if line.strip():
                return False
    # If the loop completes without finding non-whitespace characters, the file is empty
    return True

In [None]:
# Initialize a counter to keep track of moved files
moved_counter = 0
format_error_counter = 0

# Loop through each dataset in the DATASETS list
for dataset in DATASETS:
    print(f"Processing dataset: {dataset['name']}")

    # Iterate over data splits: 'train', 'valid', and 'test'
    for data in dataset['data']:
        print(f"Checking {dataset['name']}'s {data} directory...")

        # Define paths to label and image directories
        labels_dir = f"{DATASETS_TO_PROCESS_PATH}/{dataset['name']}/{data}/labels"
        images_dir = f"{DATASETS_TO_PROCESS_PATH}/{dataset['name']}/{data}/images"

        # Create a "deleted" directory if it doesn't exist
        new_labels_dir = os.path.join(f"{DATASETS_TO_PROCESS_PATH}/", dataset['name'], data, "deleted")
        os.makedirs(new_labels_dir, exist_ok=True)

        # Iterate through label files in the labels directory
        for filename in os.listdir(labels_dir):
            if filename.endswith('.txt'):
                # Determine the corresponding image name
                corresponding_image_name = filename[:-3] + 'jpg'

                # Define paths to the source (to be deleted) and destination (deleted_dir) files
                source_label_path = os.path.join(labels_dir, filename)
                source_image_path = os.path.join(images_dir, corresponding_image_name)
                destination_label_path = os.path.join(new_labels_dir, filename)
                destination_image_path = os.path.join(new_labels_dir, corresponding_image_name)

                # Check if the label file is empty
                if os.path.exists(os.path.join(labels_dir, filename)) and is_empty_file(
                        os.path.join(labels_dir, filename)):
                    # Move both the label and image files to the "deleted" directory
                    shutil.move(source_label_path, destination_label_path)
                    shutil.move(source_image_path, destination_image_path)

                    # Print a message about the move
                    print(f'Moved: {filename} and its corresponding image to "deleted" directory.')

                    # Increment the moved file counter
                    moved_counter += 1
                    continue

                # Check if for corresponding image existence
                if not os.path.exists(source_image_path):
                    # Move both the label and image files to the "deleted" directory
                    shutil.move(source_label_path, destination_label_path)
                    shutil.move(source_image_path, destination_image_path)

                    # Print a message about the move
                    print(f'Moved: {filename} to "deleted" directory, because no corresponding image was found.')

                    # Increment the moved file counter
                    moved_counter += 1
                    continue

                with open(os.path.join(labels_dir, filename), 'r') as f:
                    lines = f.readlines()

                # Open the label file for writing
                with open(os.path.join(labels_dir, filename), 'w') as f:
                    # Process each line in the label file
                    for line in lines:
                        if len(line) == 0:
                            continue

                        # Create a list to store new lines with updated class mappings
                        updated_lines = []

                        # Iterate through class mappings in the dataset's 'classes' dictionary
                        # Some label files include detection and segmentation info
                        line_format_error = False

                        delete = False

                        elements = line.strip().split()
                        if len(elements) != 5:
                            line_format_error = True
                            format_error_counter += 1
                            continue
                        object_class = elements[0]
                        target_object_class = dataset['target_classes'][int(object_class)]
                        target_object_class = str(target_object_class)

                        if target_object_class == '-1':
                            delete = True
                            continue

                        # Replace the object class with the new class
                        updated_line = target_object_class + line[len(object_class):]
                        updated_lines.append(updated_line)

                        # If there are updated lines, write them; otherwise, write the original line
                        if not delete:
                            if updated_lines or line_format_error:
                                f.writelines(updated_lines)
                            else:
                                f.write(line)
                if delete:
                    shutil.move(source_label_path, destination_label_path)
                    shutil.move(source_image_path, destination_image_path)
        print('===\n')

    # Print a message indicating the completion of dataset processing
    print(f"Finished processing dataset: {dataset['name']}...")
    print('==== ==== ==== ====\n\n')

# Print the total number of moved files across all datasets
print(f"Total moved files: {moved_counter}")
print(f"Total files with wrong YoloV8 (Detect) format: {format_error_counter}")

## Craeting a backup of the datasets first

In [None]:
for dataset in DATASETS:
    print(f"Copying {dataset['name']} files...")
    data_dir = f"{DATASETS_TO_PROCESS_PATH}/{dataset['name']}/"
    shutil.copytree(data_dir, f"{DATASETS_TO_PROCESS_PATH}/Backup/{dataset['name']}", dirs_exist_ok=True)

print('Finished copying')

### Training a model on the eyes dataset is required. 

In [None]:
EYES_ONLY_MODEL_PATH = 'models/eyes.pt'

## Running the eyes only model on the second dataset, and adding the annotations detected by the model.

In [None]:
dataset_name = DATASETS[1]['name']  # 9 - (Cigarette - Phone - Seatbelt)
dataset_dir = f"{DATASETS_TO_PROCESS_PATH}/{dataset_name}/train"

# List all image files in the directory
image_files = [f for f in os.listdir(os.path.join(dataset_dir, 'images')) if f.endswith('.jpg')]
# Choose 20 random images
image_files = random.sample(image_files, 20)

# Loop through each image file and plot detected objects
for image_file in image_files:
    # Load the image
    image_path = os.path.join(dataset_dir, 'images', image_file)
    image = cv2.imread(image_path)

    # Load the corresponding label file
    label_file_path = os.path.join(dataset_dir, 'labels', image_file.replace('.jpg', '.txt'))

    model = YOLO(EYES_ONLY_MODEL_PATH)

    results = model(image)  # results list
    model.predict(image, conf=0.6)
    # Visualize the results on the frame
    annotated_frame = results[0].plot()

    # Display the image with bounding boxes and labels
    plt.imshow(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
    plt.title(f'Detected Objects in {image_file}')
    plt.axis('off')
    plt.show()

In [None]:
model = YOLO(EYES_ONLY_MODEL_PATH)
model.predict(source=f'{DATASETS_TO_PROCESS_PATH}/{dataset_name}/train/images', task='detect', mode='predict', conf=0.5,
              save_txt=True, save=True, project=f'{DATASETS_TO_PROCESS_PATH}/final/train')

model.predict(source=f'{DATASETS_TO_PROCESS_PATH}/{dataset_name}/valid/images', task='detect', mode='predict', conf=0.5,
              save_txt=True, save=True, project=f'{DATASETS_TO_PROCESS_PATH}/final/valid')

model.predict(source=f'{DATASETS_TO_PROCESS_PATH}/{dataset_name}/test/images', task='detect', mode='predict', conf=0.5,
              save_txt=True, save=True, project=f'{DATASETS_TO_PROCESS_PATH}/final/test')

## Merging the resulting labels files with the original ones, and updating classes labels. 

In [None]:
orig_dataset_name = dataset_name  # 9 - (Cigarette - Phone - Seatbelt)
new_dataset_name = 'final'
# Iterate over data splits: 'train', 'valid'
for data in ['train', 'valid', 'test']:
    # Define paths to label and image directories
    orig_labels_dir = f"{DATASETS_TO_PROCESS_PATH}/{orig_dataset_name}/{data}/labels"

    # Create a "deleted" directory if it doesn't exist
    new_labels_dir = f"{DATASETS_TO_PROCESS_PATH}/{new_dataset_name}/{data}/predict/labels/"

    final_labels_dir = f"{DATASETS_TO_PROCESS_PATH}/{new_dataset_name}/{data}/labels/"
    os.makedirs(final_labels_dir, exist_ok=True)

    # Iterate through label files in the labels directory
    for filename in os.listdir(orig_labels_dir):
        if filename.endswith('.txt'):
            source_label_path = os.path.join(orig_labels_dir, filename)
            destination_label_path = os.path.join(final_labels_dir, filename)

            shutil.copy2(source_label_path, destination_label_path)

            orig_lines = []
            with open(os.path.join(orig_labels_dir, filename), 'r') as f:
                orig_lines = f.readlines()

            new_lines = []
            file_path = os.path.join(new_labels_dir, filename)
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    new_lines = f.readlines()

            # Open the label file for writing
            with open(os.path.join(final_labels_dir, filename), 'w') as f:
                all_lines = []

                for line in orig_lines:
                    all_lines.append(line + '\n')

                for line in new_lines:
                    all_lines.append(line + '\n')

                f.writelines(all_lines)

Copying the images.

In [None]:
orig_dataset_name = dataset_name  # 9 - (Cigarette - Phone - Seatbelt)
new_dataset_name = 'final'
# Iterate over data splits: 'train', 'valid'
for data in ['train', 'valid', 'test']:
    # Define paths to label and image directories
    orig_images_dir = f"{DATASETS_TO_PROCESS_PATH}/{orig_dataset_name}/{data}/images"

    final_images_dir = f"{DATASETS_TO_PROCESS_PATH}/{new_dataset_name}/{data}/images/"
    os.makedirs(final_images_dir, exist_ok=True)

    # Iterate through label files in the labels directory
    for filename in os.listdir(orig_images_dir):
        if filename.endswith('.jpg'):
            source_image_path = os.path.join(orig_images_dir, filename)
            destination_image_path = os.path.join(final_images_dir, filename)

            shutil.copy2(source_image_path, destination_image_path)

## Copy the dataset

In [None]:
print(f"Copying final dataset files...")
data_dir = f"{DATASETS_TO_PROCESS_PATH}/final/"
shutil.copytree(data_dir, DATASETS_PROCESSED_PATH, dirs_exist_ok=True)

print('Finished copying')

## After copying the final dataset, the data.yaml file should be edited.

In [None]:
import ruamel.yaml

data = ruamel.yaml.comments.CommentedMap()
data['train'] = '../train/images'
data['val'] = '../valid/images'
data['test'] = '../test/images'
data['nc'] = 5
data['names'] = [
    'Open Eye',
    'Closed Eye',
    'Cigarette',
    'Phone',
    'Seatbelt'
]

output_dir = f"{DATASETS_PROCESSED_PATH}"  # Replace with the desired directory path
os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist

output_file = os.path.join(output_dir, 'data.yaml')

with open(output_file, 'w') as yaml_file:
    yaml = ruamel.yaml.YAML()
    yaml.dump(data, yaml_file)

In [None]:
FINAL_DATASET = {
    'name': 'FINAL_DATASET',
    'data': ['train', 'valid', 'test'],
    'num_classes': 5,
    'classes': {
        0: 'Open Eye',
        1: 'Closed Eye',
        2: 'Cigarette',
        3: 'Phone',
        4: 'Seatbelt'
    }
}

## Counting number of instances of each class

In [None]:
import os

classes_counters = [0] * FINAL_DATASET['num_classes']

for data in FINAL_DATASET['data']:
    current_dir = os.path.join(f'{DATASETS_PROCESSED_PATH}', data, 'labels')
    # Iterate through label files in the labels directory
    for filename in os.listdir(current_dir):
        with open(os.path.join(current_dir, filename), 'r') as f:
            lines = f.readlines()

        for line in lines:
            # Iterate through class mappings in the dataset's 'classes' dictionary
            elements = line.strip().split()
            if len(elements):
                object_class = elements[0]
                classes_counters[int(object_class)] += 1

for i in range(0, FINAL_DATASET['num_classes']):
    print(f"Found {classes_counters[i]} instances of class {FINAL_DATASET['classes'][i]}")

## Showing some images of the final dataset.

In [None]:
print(f"Shwoing images from dataset:")
# Define the path to the directory containing label and image files
dataset_dir = f"{DATASETS_PROCESSED_PATH}/train"  # Replace DATASET_NAME with the actual dataset name

# List all image files in the directory
image_files = [f for f in os.listdir(os.path.join(dataset_dir, 'images')) if f.endswith('.jpg')]
image_files = random.sample(image_files, 50)

# Loop through each image file and plot detected objects
for image_file in image_files:
    # Load the image
    image_path = os.path.join(dataset_dir, 'images', image_file)
    image = cv2.imread(image_path)

    # Load the corresponding label file
    label_file_path = os.path.join(dataset_dir, 'labels', image_file.replace('.jpg', '.txt'))
    # Check if the label file exists
    if os.path.exists(label_file_path):
        # Parse the label file to get object info
        objects = read_label_file(label_file_path)

        # Loop through detected objects and draw bounding boxes with labels
        for obj in objects:
            class_id, center_x, center_y, box_width, box_height = obj

            # Convert YOLO format to pixel coordinates
            height, width, _ = image.shape
            left = int((center_x - box_width / 2) * width)
            top = int((center_y - box_height / 2) * height)
            right = int((center_x + box_width / 2) * width)
            bottom = int((center_y + box_height / 2) * height)

            # Draw bounding box on the image
            cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
            # Add class label
            class_label = FINAL_DATASET['classes'][class_id]
            label_text = f'{class_label}'
            cv2.putText(image, label_text, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the image with bounding boxes and labels
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title(f'Detected Objects in {image_file}')
    plt.axis('off')
    plt.show()