### Converting the original data to YOLO format

In [16]:
import os
import shutil
import random
import cv2

In [17]:
image_dir = "data/Dataset 1 (Simplex)/Train data/Positive data"  # Path to images
annotation_file = "data/Dataset 1 (Simplex)/simpleTrainFullPhotosSortedFullAnnotations.txt"  # Annotation file

# Create necessary directories for YOLO format dataset
yolo_images_dir = "yolo_data/images"
yolo_labels_dir = "yolo_data/labels"
os.makedirs(yolo_images_dir, exist_ok=True)
os.makedirs(yolo_labels_dir, exist_ok=True)

In [18]:
def convert_to_yolo_format(x, y, w, h, image_width, image_height):
    x_center = (x + w / 2) / image_width
    y_center = (y + h / 2) / image_height
    width = w / image_width
    height = h / image_height
    return x_center, y_center, width, height

### Dealing with negative data from train

In [19]:
negative_data_dir = "data/Dataset 1 (Simplex)/Train data/Negative data"

# Process Negative Images
negative_images = os.listdir(negative_data_dir)

# Move negative images and create empty annotation files
for image_name in negative_images:
    # Copy the negative image to the YOLO dataset
    negative_image_path = os.path.join(negative_data_dir, image_name)
    yolo_image_path = os.path.join(yolo_images_dir, image_name)
    shutil.copy(negative_image_path, yolo_image_path)

    # Correct the extension handling (convert .JPG to .txt)
    txt_name = os.path.splitext(image_name)[0] + ".txt"
    yolo_label_path = os.path.join(yolo_labels_dir, txt_name)
    
    # Create an empty label file for negative images
    open(yolo_label_path, 'w').close()

### Dealing with positive data

### Shuffling into train and validation

In [20]:
with open(annotation_file, 'r') as file:
    lines = file.readlines()

# For each line in the annotation file
for line in lines:
    parts = line.strip().split()

    image_name = parts[2].replace('.bmp', '.JPG')  # Use the .jpg image name
    image_name = image_name.split("data\\", 1)[-1]
    num_potholes = int(parts[3])

    # Extract bounding boxes (4 values for each pothole)
    bboxes = []
    for i in range(num_potholes):
        x = int(parts[4 + i * 4])  # x coordinate of the top-left corner
        y = int(parts[5 + i * 4])  # y coordinate of the top-left corner
        width = int(parts[6 + i * 4])  # width of the bounding box
        height = int(parts[7 + i * 4])  # height of the bounding box
        bboxes.append((x, y, width, height))

    # Step 4: Load the image
    image_path = os.path.join(image_dir, image_name)
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape  # Get image dimensions

    # Save the image to YOLO format directory
    yolo_image_path = os.path.join(yolo_images_dir, image_name)
    shutil.copy(image_path, yolo_image_path)

    # Create YOLO label file
    yolo_label_path = os.path.join(yolo_labels_dir, image_name.replace('.JPG', '.txt'))

    with open(yolo_label_path, 'w') as label_file:
        for (x, y, w, h) in bboxes:
            # Convert to YOLO format
            x_center, y_center, width, height = convert_to_yolo_format(x, y, w, h, image_width, image_height)
            # Assuming class id 0 for potholes
            label_file.write(f"0 {x_center} {y_center} {width} {height}\n")

# Split the dataset into train and validation sets (80% train, 20% val)
image_files = os.listdir(yolo_images_dir)
random.shuffle(image_files)

split_index = int(0.8 * len(image_files))

train_images = image_files[:split_index]
val_images = image_files[split_index:]

# Create train/val directories
train_images_dir = "yolo_data/train/images"
val_images_dir = "yolo_data/val/images"
train_labels_dir = "yolo_data/train/labels"
val_labels_dir = "yolo_data/val/labels"

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

In [21]:
# Move images and labels to train/val directories
for image_name in train_images:
    shutil.move(os.path.join(yolo_images_dir, image_name), os.path.join(train_images_dir, image_name))
    shutil.move(os.path.join(yolo_labels_dir, image_name.replace('.JPG', '.txt')), os.path.join(train_labels_dir, image_name.replace('.JPG', '.txt')))

for image_name in val_images:
    shutil.move(os.path.join(yolo_images_dir, image_name), os.path.join(val_images_dir, image_name))
    shutil.move(os.path.join(yolo_labels_dir, image_name.replace('.JPG', '.txt')), os.path.join(val_labels_dir, image_name.replace('.JPG', '.txt')))

print("Dataset is now ready for YOLOv8 training.")

G0028680.JPG
G0028680.txt
G0067299.JPG
G0067299.txt
G0067192.JPG
G0067192.txt
G0067217.JPG
G0067217.txt
G0029858.JPG
G0029858.txt
G0028669.JPG
G0028669.txt
G0077618.JPG
G0077618.txt
G0077690.JPG
G0077690.txt
G0027990.JPG
G0027990.txt
G0041581.JPG
G0041581.txt
G0077394.JPG
G0077394.txt
G0026877.JPG
G0026877.txt
G0066887.JPG
G0066887.txt
G0063547.JPG
G0063547.txt
G0063913.JPG
G0063913.txt
G0051965.JPG
G0051965.txt
G0040852.JPG
G0040852.txt
G0052254.JPG
G0052254.txt
G0088152.JPG
G0088152.txt
G0077926.JPG
G0077926.txt
G0041407.JPG
G0041407.txt
G0029578.JPG
G0029578.txt
G0077452.JPG
G0077452.txt
G0027190.JPG
G0027190.txt
G0067211.JPG
G0067211.txt
G0066621.JPG
G0066621.txt
G0029948.JPG
G0029948.txt
G0027465.JPG
G0027465.txt
G0066149.JPG
G0066149.txt
G0041347.JPG
G0041347.txt
G0016593.JPG
G0016593.txt
G0064981.JPG
G0064981.txt
G0029185.JPG
G0029185.txt
G0010127.JPG
G0010127.txt
G0028597.JPG
G0028597.txt
G0029113.JPG
G0029113.txt
G0052355.JPG
G0052355.txt
G0027367.JPG
G0027367.txt
G0066642.JPG

### Dealing with test data

In [31]:
test_image_dir = "data/Dataset 1 (Simplex)/Test data"
test_annotation_file = "data/Dataset 1 (Simplex)/simpleTestFullSizeAllPotholesSortedFullAnnotation.txt"

# YOLO Test Directories
yolo_test_images_dir = "yolo_data/test/images"
yolo_test_labels_dir = "yolo_data/test/labels"
os.makedirs(yolo_test_images_dir, exist_ok=True)
os.makedirs(yolo_test_labels_dir, exist_ok=True)

# Process Test Annotations
with open(test_annotation_file, 'r') as file:
    lines = file.readlines()

# For each line in the test annotation file
for line in lines:
    parts = line.strip().split()
    image_name = parts[1].replace('.bmp', '.JPG')  # Convert .bmp to .jpg
    image_name = image_name.split("data\\", 1)[-1]  # Remove unnecessary path part
    num_potholes = int(parts[2])

    # Extract bounding boxes
    bboxes = []
    for i in range(num_potholes):
        x = int(parts[3 + i * 4])  # x-coordinate of the top-left corner
        y = int(parts[4 + i * 4])  # y-coordinate of the top-left corner
        width = int(parts[5 + i * 4])  # width of the bounding box
        height = int(parts[6 + i * 4])  # height of the bounding box
        bboxes.append((x, y, width, height))

    # Load the image
    image_path = os.path.join(test_image_dir, image_name)
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    # Save the image to YOLO test directory
    yolo_image_path = os.path.join(yolo_test_images_dir, image_name)
    shutil.copy(image_path, yolo_image_path)

    # Create YOLO label file
    yolo_label_path = os.path.join(yolo_test_labels_dir, image_name.replace('.JPG', '.txt'))

    with open(yolo_label_path, 'w') as label_file:
        for (x, y, w, h) in bboxes:
            # Convert to YOLO format
            x_center, y_center, width, height = convert_to_yolo_format(x, y, w, h, image_width, image_height)
            # Assuming class id 0 for potholes
            label_file.write(f"0 {x_center} {y_center} {width} {height}\n")

print("Test dataset is now ready for YOLOv8 evaluation.")

['Test', 'data\\G0011476.bmp', '2', '2176', '1580', '242', '44', '1726', '1458', '106', '26']
G0011476.JPG
['Test', 'data\\G0011523.bmp', '3', '2250', '1472', '68', '24', '2192', '1432', '56', '18', '1974', '1344', '70', '22']
G0011523.JPG
['Test', 'data\\G0011524.bmp', '3', '2474', '1592', '120', '40', '2336', '1508', '88', '24', '1992', '1370', '90', '18']
G0011524.JPG
['Test', 'data\\G0011562.bmp', '1', '2188', '1542', '220', '50']
G0011562.JPG
['Test', 'data\\G0011585.bmp', '5', '1048', '1724', '330', '48', '1472', '1514', '142', '24', '1644', '1416', '66', '18', '1922', '1494', '66', '22', '2008', '1452', '126', '26']
G0011585.JPG
['Test', 'data\\G0011587.bmp', '2', '1488', '1496', '102', '32', '2204', '1648', '292', '72']
G0011587.JPG
['Test', 'data\\G0011601.bmp', '2', '1876', '1428', '62', '14', '2050', '1476', '44', '22']
G0011601.JPG
['Test', 'data\\G0011602.bmp', '2', '2160', '1576', '70', '28', '1888', '1470', '86', '22']
G0011602.JPG
['Test', 'data\\G0011603.bmp', '1', '18