In [72]:
import os
import glob
from pathlib import Path
import shutil
from pylabel import importer
from tqdm.auto import tqdm

import numpy as np
import random

In [31]:
RAND_SEED = 42
random.seed(RAND_SEED)

In [2]:
root_dir = Path(os.getcwd()).parent.parent
data_dir = root_dir / "data"

# RDDC2020 Data

In [3]:
country = "Japan"
annotation_source = data_dir/Path(rf"{country}\train\annotations\xmls")
image_dir = data_dir / country / "train" / "images"

In [4]:
dataset = importer.ImportVOC(path=annotation_source)
annotation_result = str((Path(dataset.path_to_annotations).parent / "labels"))
Path(annotation_result).mkdir(exist_ok=True)

Importing VOC files...: 100%|██████████| 10506/10506 [00:04<00:00, 2411.21it/s]


In [5]:
print(f"Number of images: {dataset.analyze.num_images}")
print(f"Number of classes: {dataset.analyze.num_classes}")
print(f"Classes:{dataset.analyze.classes}")
print(f"Class counts:\n{dataset.analyze.class_counts}")

Number of images: 9712
Number of classes: 7
Classes:['D20', 'D40', 'D10', 'D00', 'D44', 'D43', 'D50']
Class counts:
cat_name
D20    6199
D00    4049
D44    3995
D10    3979
D50    3553
D40    2243
D43     736
Name: count, dtype: int64


In [6]:
dataset.analyze.classes

['D20', 'D40', 'D10', 'D00', 'D44', 'D43', 'D50']

In [112]:
class_mapping = {classes: int(classes[1:]) for classes in dataset.analyze.classes}
name_mapping = {
    20:'alligator_crack',
    40:'pothole',
    10:'traverse_crack',
    0:'longitudinal_crack',
    44:'whiteline_blur',
    43:'crosswalk_blur',
    50:'utility'
}
excluded_classes = ['43','44','50']

In [8]:
class_mapping

{'D20': 20, 'D40': 40, 'D10': 10, 'D00': 0, 'D44': 44, 'D43': 43, 'D50': 50}

In [36]:
image_list = glob.glob(str(image_dir / "*.jpg"))
xml_list = glob.glob(str(annotation_source / "*.xml"))
# labels_list = glob.glob(str(image_dir / "*.txt"))

# xml_list = [x.split('\\')[-1].split('.')[0] for x in xml_list]
# labels_list = [x.split('\\')[-1].split('.')[0] for x in labels_list]

# temp3 = [x for x in xml_list if x not in set(labels_list)]
# len(temp3)

In [52]:
random_index = random.sample(np.arange(len(image_list)).tolist(), int(len(image_list)*.8))

In [54]:
from operator import itemgetter

In [65]:
getter = itemgetter(*random_index)

train_image_list = list(getter(image_list))
valid_image_list = [item for item in image_list if item not in train_image_list]

train_xml_list = list(getter(xml_list))
valid_xml_list = [item for item in xml_list if item not in train_xml_list]

## Pascal_VOC

In [77]:
from pascal import annotation_from_xml

In [82]:
pascal_train_annotation_result = Path(r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\rddc2020_use_pascal\train")
pascal_valid_annotation_result = Path(r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\rddc2020_use_pascal\valid")

try:
    pascal_train_annotation_result.mkdir(exist_ok=False)
    (pascal_train_annotation_result / "images").mkdir(exist_ok=False)
    (pascal_train_annotation_result / "labels").mkdir(exist_ok=False)
except:
    pass

try:
    pascal_valid_annotation_result.mkdir(exist_ok=False)
    (pascal_valid_annotation_result / "images").mkdir(exist_ok=False)
    (pascal_valid_annotation_result / "labels").mkdir(exist_ok=False)
except:
    pass

In [83]:
ann_split = [pascal_train_annotation_result, pascal_valid_annotation_result]
img_split = [train_image_list, valid_image_list]
xml_split = [train_xml_list, valid_xml_list]

In [84]:
for xml, img in tqdm(zip(train_xml_list, train_image_list), total=len(train_image_list)):
    # read xml file
    ann = annotation_from_xml(xml)

    # get ann str
    yolo_ann = ann.to_yolo(class_mapping)
    
    # save yolo format file
    with open((pascal_train_annotation_result / "labels" / Path(xml).name).with_suffix(".txt"), "w") as f:
        f.write(yolo_ann)
    
    shutil.copy(img, pascal_train_annotation_result / "images" / Path(img).name)

0it [00:00, ?it/s]

In [85]:
for xml, img in tqdm(zip(valid_xml_list, valid_image_list), total=len(valid_image_list)):
    # read xml file
    ann = annotation_from_xml(xml)

    # get ann str
    yolo_ann = ann.to_yolo(class_mapping)
    
    # save yolo format file
    with open((pascal_valid_annotation_result / "labels" / Path(xml).name).with_suffix(".txt"), "w") as f:
        f.write(yolo_ann)
    
    shutil.copy(img, pascal_valid_annotation_result / "images" / Path(img).name)

  0%|          | 0/2102 [00:00<?, ?it/s]

In [115]:
for txt in tqdm(glob.glob(str(pascal_train_annotation_result / "labels" / "*.txt"))):
    # Read the annotation file, filter lines based on class labels, and write back to the file
    with open(txt, 'r') as file:
        lines = file.readlines()

    with open(txt, 'w') as file:
        for line in lines:
            # Split the line into components
            parts = line.split()
            if len(parts) > 0:
                class_label = parts[0]
                if class_label not in excluded_classes:
                    file.write(line)

  0%|          | 0/8404 [00:00<?, ?it/s]

In [129]:
for txt in glob.glob(str(pascal_train_annotation_result / "labels" / "*.txt")):
    if not os.path.getsize(txt):
        os.remove(txt)

In [114]:
for txt in tqdm(glob.glob(str(pascal_valid_annotation_result / "labels" / "*.txt"))):
    # Read the annotation file, filter lines based on class labels, and write back to the file
    with open(txt, 'r') as file:
        lines = file.readlines()

    with open(txt, 'w') as file:
        for line in lines:
            # Split the line into components
            parts = line.split()
            if len(parts) > 0:
                class_label = parts[0]
                if class_label not in excluded_classes:
                    file.write(line)

  0%|          | 0/2102 [00:00<?, ?it/s]

In [None]:
for txt in glob.glob(str(pascal_valid_annotation_result / "labels" / "*.txt")):
    if not os.path.getsize(txt):
        os.remove(txt)

In [130]:
rddc2020_use_pascal_train_dataset = importer.ImportYoloV5(
    path=pascal_train_annotation_result / "labels",
    path_to_images=pascal_train_annotation_result / "images"
)

Importing YOLO files...: 100%|██████████| 6320/6320 [00:26<00:00, 238.20it/s]


In [131]:
print(f"Number of images: {rddc2020_use_pascal_train_dataset.analyze.num_images}")
print(f"Number of classes: {rddc2020_use_pascal_train_dataset.analyze.num_classes}")
print(f"Classes:{rddc2020_use_pascal_train_dataset.analyze.classes}")
print(f"Class counts:\n{rddc2020_use_pascal_train_dataset.analyze.class_counts}")

Number of images: 6320
Number of classes: 0
Classes:[]
Class counts:
cat_name
    13197
Name: count, dtype: int64


## PyLabel

In [69]:
dataset.export.ExportToYoloV5(
    output_path=r'D:\0_amri_local\14_pupr_roaddamagedetection\data\Japan\train\labels',
    yaml_file='dataset.yaml', cat_id_index=0
    )

Exporting YOLO files...: 100%|██████████| 9712/9712 [00:32<00:00, 301.41it/s]


['D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\dataset.yaml',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000000.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000001.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000002.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000003.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000004.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000005.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000006.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000008.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\data\\Japan\\train\\labels\\Japan_000009.txt',
 'D:\\0_amri_local\\14_pupr_roaddamagedetection\\dat

In [66]:
class_mapping_new = {
    '0.0000': 0,
    '1.0000': 1,
    '2.0000': 2,
    '3.0000': 3,
    '4.0000': 4,
    '5.0000': 5,
    '6.0000': 6,
    # Add more mappings as needed
}

In [68]:
# Iterate through all the label files in the folder
label_folder = r'D:\0_amri_local\14_pupr_roaddamagedetection\data\Japan\train\labels'
for filename in tqdm(os.listdir(label_folder)):
    if filename.endswith(".txt"):
        xml = os.path.join(label_folder, filename)
        
        # Read the content of the label file
        with open(xml, "r") as file:
            lines = file.readlines()
        
        # Create a new label file with integer class labels
        with open(xml, "w") as file:
            for line in lines:
                parts = line.split()
                if len(parts) > 0:
                    float_class = float(parts[0])
                    if float_class in class_mapping:
                        parts[0] = str(int(float(parts[0])))
                    new_line = " ".join(parts)
                    file.write(new_line + "\n")

  0%|          | 0/9712 [00:00<?, ?it/s]

In [39]:
for filename in os.listdir(label_folder):
    if filename.endswith(".txt"):
        xml = os.path.join(label_folder, filename)

In [70]:
# Define the value
value = "0 0.8383 0.8925 0.3233 0.2017"

# Define the file path where you want to save the value
xml = "output.txt"

# Write the value to the file
with open(xml, "w") as file:
    file.write(value)

In [71]:
# Define the new value
new_value = "0.000 0.8383 0.8925 0.3233 0.2017"

# Specify the file path
xml = "output.txt"

# Open the file in write mode and overwrite its content with the new value
with open(xml, "w") as file:
    file.write(new_value)

print(f"Value in {xml} has been updated to '{new_value}'")

Value in output.txt has been updated to '0.000 0.8383 0.8925 0.3233 0.2017'


## Manual

In [114]:
import xml.etree.ElementTree as ET

In [115]:
# Function to convert Pascal VOC bounding box to YOLO format
def convert_pascal_voc_to_yolo(xmin, ymin, xmax, ymax, img_width, img_height):
    x_center = (xmin + xmax) / (2.0 * img_width)
    y_center = (ymin + ymax) / (2.0 * img_height)
    width = (xmax - xmin) / img_width
    height = (ymax - ymin) / img_height
    return x_center, y_center, width, height

In [121]:
# Loop through each XML file in the Pascal VOC folder
for xml_file in tqdm(os.listdir(annotation_source)):
    if xml_file.endswith(".xml"):
        tree = ET.parse(os.path.join(annotation_source, xml_file))
        root = tree.getroot()

        image_width = float(root.find("size/width").text)
        image_height = float(root.find("size/height").text)

        yolo_lines = []

        for obj in root.iter("object"):
            class_name = class_mapping[obj.find("name").text]
            
            bbox = obj.find("bndbox")
            xmin = float(bbox.find("xmin").text)
            ymin = float(bbox.find("ymin").text)
            xmax = float(bbox.find("xmax").text)
            ymax = float(bbox.find("ymax").text)

            x_center, y_center, width, height = convert_pascal_voc_to_yolo(
                xmin, ymin, xmax, ymax, image_width, image_height
            )

            yolo_line = f"{class_name} {x_center} {y_center} {width} {height}"
            if class_name not in excluded_classes:
                yolo_lines.append(yolo_line)

        # Write YOLO format annotations to a text file
        yolo_file_path = os.path.join(annotation_result, os.path.splitext(xml_file)[0] + ".txt")
        with open(yolo_file_path, "w") as yolo_file:
            yolo_file.write("\n".join(yolo_lines))

  0%|          | 0/10506 [00:00<?, ?it/s]

In [20]:
import cv2
from matplotlib import pyplot as plt

In [33]:
rf_v8_img_dir = r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\roboflow\train\images"
rf_v8_label_dir = r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\roboflow\train\labels"
image_files = glob.glob(str(Path(rf_v8_img_dir, "*.jpg")))
annotation_files = glob.glob(str(Path(rf_v8_label_dir, "*.txt")))

In [34]:
print(len(image_files) == len(annotation_files))

True


In [35]:
temp_dataset = importer.ImportYoloV5(
    # yaml_file=r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\roboflow\data.yaml"
    path=rf_v8_label_dir,
    path_to_images=rf_v8_img_dir
)

Importing YOLO files...: 100%|██████████| 135/135 [00:00<00:00, 245.89it/s]


In [36]:
print(f"Number of images: {temp_dataset.analyze.num_images}")
print(f"Number of classes: {temp_dataset.analyze.num_classes}")
print(f"Classes:{temp_dataset.analyze.classes}")
print(f"Class counts:\n{temp_dataset.analyze.class_counts}")

Number of images: 135
Number of classes: 0
Classes:[]
Class counts:
cat_name
    328
Name: count, dtype: int64


In [21]:
random_index = random.randint(0, len(image_files) - 1)
random_image = image_files[random_index]
random_annotation = annotation_files[random_index]

In [30]:
random_index = random.randint(0, len(image_files) - 1)
random_image = image_files[random_index]
random_annotation = annotation_files[random_index]
image_name = random_image.split('\\')[-1]

# image_path = os.path.join(image_dir, random_image)
image = cv2.imread(random_image)

# Read the annotation file and parse bounding box coordinates
with open(random_annotation, 'r') as f:
    annotations = f.read().strip().split('\n')

for annotation in annotations:
    try:
        class_id, x_center, y_center, width, height = map(float, annotation.split())
        x, y, w, h = x_center, y_center, width, height
        
        # Convert YOLO format to OpenCV format (x,y) to (x1,y1) and (x2,y2)
        x1 = int((x - w / 2) * image.shape[1])
        y1 = int((y - h / 2) * image.shape[0])
        x2 = int((x + w / 2) * image.shape[1])
        y2 = int((y + h / 2) * image.shape[0])
        
        # Draw bounding boxes on the image
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # # Display class label on top of the bounding box
        # text_position = (x1, y1 - 10)  # Adjust the text position for your preference
        # cv2.putText(image, name_mapping[int(class_id)], text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        damage = True
    except:
        damage = False

text_position = (x1, y1 - 10)  # Adjust the text position for your preference
cv2.putText(image, name_mapping[int(class_id)], text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# Display the image with bounding boxes
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')

if damage:
    plt.title(f'{image_name} with Bounding Box')
else:
    plt.title(f'No damage found in {image_name}')
plt.show()

NameError: name 'name_mapping' is not defined

In [156]:
import shutil

In [157]:
# Define the paths to the train and valid folders
train_folder = r'D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\train'
valid_folder = r'D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\data\valid'

# Create the "valid" folder if it doesn't exist
if not os.path.exists(valid_folder):
    os.makedirs(valid_folder)
    os.makedirs(Path(valid_folder) / 'images')
    os.makedirs(Path(valid_folder) / 'labels')

# Define the ratio of data to move to the valid folder (20%)
split_ratio = 0.2

# List all files in the "train" folder
train_files = os.listdir(os.path.join(train_folder, 'images'))

# Calculate the number of files to move to the "valid" folder
num_files_to_move = int(len(train_files) * split_ratio)

# Randomly select files to move to the "valid" folder
files_to_move = random.sample(train_files, num_files_to_move)

# Move the selected files to the "valid" folder
for file_name in tqdm(files_to_move):
    image_src_path = os.path.join(train_folder, 'images', file_name)
    label_src_path = os.path.join(train_folder, 'labels', file_name.replace('.jpg', '.txt'))
    
    image_dest_path = os.path.join(valid_folder, 'images', file_name)
    label_dest_path = os.path.join(valid_folder, 'labels', file_name.replace('.jpg', '.txt'))
    
    shutil.move(image_src_path, image_dest_path)
    shutil.move(label_src_path, label_dest_path)

print(f'Moved {num_files_to_move} files to the "valid" folder.')

  0%|          | 0/2101 [00:00<?, ?it/s]

Moved 2101 files to the "valid" folder.


In [31]:
# new_dataset = importer.ImportYoloV5(
#     path=r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\train\labels",
#     path_to_images=r"D:\0_amri_local\14_pupr_roaddamagedetection\yolov8_custom\train\images"
#     )

In [32]:
# print(f"Number of images: {new_dataset.analyze.num_images}")
# print(f"Number of classes: {new_dataset.analyze.num_classes}")
# print(f"Classes:{new_dataset.analyze.classes}")
# print(f"Class counts:\n{new_dataset.analyze.class_counts}")

# Placeholder