In [60]:
import os
import pandas as pd
import cv2

# Define the path to the dataset
data_path = "DATASET"

# Load the train.csv file
train_df = pd.read_csv(os.path.join(data_path, "data.csv"))

# Create a new directory to store the YOLO format data
yolo_data_path = os.path.join(data_path, "yolo_data")
if not os.path.exists(yolo_data_path):
    os.mkdir(yolo_data_path)
    
# Create a new directory to store the images
yolo_images_path = os.path.join(yolo_data_path, "images")
if not os.path.exists(yolo_images_path):
    os.mkdir(yolo_images_path)
    
# Copy the images to the new directory
for image_name in train_df["image_path"].unique():
    image_path = os.path.join(data_path, "images", image_name)
    new_image_path = os.path.join(yolo_images_path, image_name)
    os.system(f"copy {image_path} {new_image_path}")

# Create a new .txt file for each image
for image_name in train_df["image_path"].unique():
    image_data = train_df[train_df["image_path"] == image_name]
    img = cv2.imread(os.path.join(yolo_images_path, image_name))
    h, w, _ = img.shape
    txt_path = os.path.join(yolo_images_path, f"{image_name.split('.')[0]}.txt")
    with open(txt_path, "w") as f:
        for index, row in image_data.iterrows():
            class_id = int(row["class"])

            xmin = row["xmin"]
            ymin = row["ymin"]
            xmax = row["xmax"]
            ymax = row["ymax"]

            width = xmax - xmin
            height = ymax - ymin
            center_x = (xmin + xmax) / 2
            center_y = (ymin + ymax) / 2
            
            center_x/=w
            center_y/=h
            width/=w
            height/=h

            f.write(f"{class_id} {center_x} {center_y} {width} {height}\n")


Data conversion complete!


In [None]:
import os
import random
import yaml

# Path to the dataset
dataset_root = r"dataset"
dataset_path = r"dataset/dataset"

# List all image and annotation files in the dataset
data_files = [f for f in os.listdir("dataset/dataset") if f.endswith(".jpg")]

# remove .jpg from the file names
data_files = [f[:-4] for f in data_files]

# Randomly shuffle the list
random.shuffle(data_files)

# Split the list into two parts
split_idx = int(len(data_files) * 0.95)
train, val = data_files[:split_idx], data_files[split_idx:]

# Create train and val folders
train_path = os.path.join(dataset_root, "train")
val_path = os.path.join(dataset_root, "val")
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Move train and val files
for f in train:
    os.rename(f"{dataset_path}/{f}.jpg", f"{train_path}/{f}.jpg")
    os.rename(f"{dataset_path}/{f}.txt", f"{train_path}/{f}.txt")

for f in val:
    os.rename(f"{dataset_path}/{f}.jpg", f"{val_path}/{f}.jpg")
    os.rename(f"{dataset_path}/{f}.txt", f"{val_path}/{f}.txt")

# Create a dictionary of class labels
classes = {
    0: 'GRAFFITI',
    1: 'FADED_SIGNAGE',
    2: 'POTHOLES',
    3: 'GARBAGE',
    4: 'CONSTRUCTION_ROAD',
    5: 'CLUTTER_SIDEWALK',
    6: 'BAD_STREETLIGHT',
    7: 'BAD_BILLBOARD',
    8: 'SAND_ON_ROAD',
    9: 'CLUTTER_SIDEWALK',
    10: 'UNKEPT_FACADE'
}

# Create YAML Data Config File
with open(os.path.join(dataset_root, "data.yml"), 'w') as f:
    yaml.dump(
        {
            'path': dataset_root,
            'train': "train",
            'val': "val",
            'nc':  len(classes),
            'names': classes,
        },

        f,
        default_flow_style=False
    )