In [6]:
import os
import shutil

from PIL import Image
from math import floor

#### AsphaltBad dataset

In [7]:
image_dir = "data/badAsphalt/asphaltBad"
label_dir = "data/badAsphalt/bad_labels"

train_image_dst = "yolo_data/train/images"
train_label_dst = "yolo_data/train/labels"
val_image_dst = "yolo_data/val/images"
val_label_dst = "yolo_data/val/labels"

os.makedirs(train_image_dst, exist_ok=True)
os.makedirs(train_label_dst, exist_ok=True)
os.makedirs(val_image_dst, exist_ok=True)
os.makedirs(val_label_dst, exist_ok=True)

In [8]:
image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(".png")])
train_images = image_files[:156]
val_images = image_files[-30:]

In [9]:
def convert_and_copy(images, image_dst, label_dst):
    for png_name in images:
        # Convert .png to .jpg
        img_path = os.path.join(image_dir, png_name)
        img = Image.open(img_path).convert("RGB")
        
        jpg_name = png_name.replace(".png", ".JPG")
        img.save(os.path.join(image_dst, jpg_name), "JPEG")

        # Copy label with adjusted name
        label_name = png_name.replace(".png", ".txt")
        shutil.copy(os.path.join(label_dir, label_name), os.path.join(label_dst, label_name))

convert_and_copy(train_images, train_image_dst, train_label_dst)
convert_and_copy(val_images, val_image_dst, val_label_dst)

print("✅ PNGs converted to JPGs and files copied.")

✅ PNGs converted to JPGs and files copied.


#### Potholes dataset

In [10]:
base_dir = "data/Potholes_dataset"
train_img_dst = "yolo_data/train/images"
train_lbl_dst = "yolo_data/train/labels"
val_img_dst = "yolo_data/val/images"
val_lbl_dst = "yolo_data/val/labels"

os.makedirs(train_img_dst, exist_ok=True)
os.makedirs(train_lbl_dst, exist_ok=True)
os.makedirs(val_img_dst, exist_ok=True)
os.makedirs(val_lbl_dst, exist_ok=True)

In [11]:
from PIL import Image
import shutil
import os
from math import floor

def process_split(file_list, img_dst, lbl_dst):
    for img_name in file_list:
        base_name = os.path.splitext(img_name)[0]
        img_path = os.path.join(sub_path, img_name)
        label_path = os.path.join(sub_path, base_name + ".txt")

        # Convert image to .JPG
        new_img_name = base_name + ".JPG"
        img = Image.open(img_path).convert("RGB")
        img.save(os.path.join(img_dst, new_img_name), "JPEG")

        # Process and copy label file (if it exists)
        if os.path.exists(label_path):
            new_label_path = os.path.join(lbl_dst, base_name + ".txt")
            with open(label_path, "r") as lf, open(new_label_path, "w") as out:
                for line in lf:
                    parts = line.strip().split()
                    if len(parts) > 0:
                        # Change class to 0, regardless of what it was
                        parts[0] = "0"
                        out.write(" ".join(parts) + "\n")
        else:
            print(f"⚠️ Missing label for {img_name}")

# Iterate over subfolders
for subfolder in os.listdir(base_dir):
    sub_path = os.path.join(base_dir, subfolder)
    
    if not os.path.isdir(sub_path):
        continue  # Skip if not a folder
    
    # Collect all .jpg image files
    image_files = sorted([f for f in os.listdir(sub_path) if f.lower().endswith(".jpg")])
    
    # Split index
    total = len(image_files)
    split_index = floor(total * 0.85)
    train_files = image_files[:split_index]
    val_files = image_files[split_index:]
    
    # Process training and validation sets
    process_split(train_files, train_img_dst, train_lbl_dst)
    process_split(val_files, val_img_dst, val_lbl_dst)

print("✅ All subfolders in Potholes_dataset processed successfully.")


✅ All subfolders in Potholes_dataset processed successfully.
