# Generate data from annotated data

Download the annotations file in YOLO pytorch format, unzip, copy file and rename to annotated_data/

Run each code block, ensuring that the file paths are correct

In [1]:
import os
import csv
import cv2
import glob

## Training data

In [2]:
# Paths
csv_file_path_train = "annotated_data/train/_annotations.csv"
image_folder_path_train = "annotated_data/train/"
output_image_folder_train = "data_2/train_images/"
output_annotation_folder = "data_2/train_annotations/"

# Create output directories if they don't exist
os.makedirs(output_image_folder_train, exist_ok=True)
os.makedirs(output_annotation_folder, exist_ok=True)

# Read CSV file
with open(csv_file_path_train, "r") as csv_file:
    lines = csv_file.readlines()[1:]

annotations_dict = {}

for line in lines:
    image_name, width, height, class_id, xmin, ymin, xmax, ymax = line.strip().split(",")
    image_number, _, version, _ = image_name.rsplit(".")
    image_number = image_number.rsplit("_")[0]

    if class_id == "L":
        class_id = 1
    elif class_id == "potholes":
        class_id = 0

    img_width, img_height = int(width), int(height)
    xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)

    x_center = (xmin + xmax) / 2.0 / img_width
    y_center = (ymin + ymax) / 2.0 / img_height
    width = (xmax - xmin) / img_width
    height = (ymax - ymin) / img_height

    if image_number not in annotations_dict:
        annotations_dict[image_number] = {}

    annotations_dict[image_number][version] = annotations_dict[image_number].get(version, []) + [
        [class_id, x_center, y_center, width, height]
    ]

# Save images and annotations
for image_path in glob.glob(os.path.join(image_folder_path_train, "*.jpg")):
    image_name = os.path.basename(image_path)
    image_number, _, version, _ = image_name.rsplit(".")
    image_number = image_number.rsplit("_")[0]

    # Save image
    image = cv2.imread(image_path)
    output_image_path = os.path.join(output_image_folder_train, f"{image_number}_{version}.jpg")
    cv2.imwrite(output_image_path, image)

    # Save annotations
    if image_number in annotations_dict and version in annotations_dict[image_number]:
        output_annotation_path = os.path.join(output_annotation_folder, f"{image_number}_{version}.txt")
        with open(output_annotation_path, 'w', newline='') as outfile:
            writer = csv.writer(outfile, delimiter=' ')
            for annotation in annotations_dict[image_number][version]:
                writer.writerow(annotation)

print("Processing complete!")


Processing complete!


# Validation data

In [3]:
csv_file_path_valid = "annotated_data/valid/_annotations.csv"
image_folder_path_valid = "annotated_data/valid/"

# Read CSV file
with open(csv_file_path_valid, "r") as csv_file:
    lines = csv_file.readlines()[1:]

annotations_dict = {}

for line in lines:
    if len(line.strip()) > 1:
        image_name, width, height, class_id, xmin, ymin, xmax, ymax = line.strip().split(",")
        image_number, _, version, _ = image_name.rsplit(".")
        image_number = image_number.rsplit("_")[0]

        if class_id == "L":
            class_id = 1
        elif class_id == "potholes":
            class_id = 0

        img_width, img_height = int(width), int(height)
        xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)

        x_center = (xmin + xmax) / 2.0 / img_width
        y_center = (ymin + ymax) / 2.0 / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height

        if image_number not in annotations_dict:
            annotations_dict[image_number] = {}

        annotations_dict[image_number][version] = annotations_dict[image_number].get(version, []) + [
            [class_id, x_center, y_center, width, height]
        ]

# Save images and annotations
for image_path in glob.glob(os.path.join(image_folder_path_valid, "*.jpg")):
    image_name = os.path.basename(image_path)
    image_number, _, version, _ = image_name.rsplit(".")
    image_number = image_number.rsplit("_")[0]

    # Save image
    image = cv2.imread(image_path)
    output_image_path = os.path.join(output_image_folder_train, f"{image_number}_{version}.jpg")
    cv2.imwrite(output_image_path, image)

    # Save annotations
    if image_number in annotations_dict and version in annotations_dict[image_number]:
        output_annotation_path = os.path.join(output_annotation_folder, f"{image_number}_{version}.txt")
        with open(output_annotation_path, 'w', newline='') as outfile:
            writer = csv.writer(outfile, delimiter=' ')
            for annotation in annotations_dict[image_number][version]:
                writer.writerow(annotation)

print("Processing complete!")


Processing complete!


# Test data

In [4]:
image_folder_path_test = "annotated_data/test/"
output_image_folder_test = "data_2/test_images/"

os.makedirs(output_image_folder_test, exist_ok=True)

# Save images and annotations
for image_path in glob.glob(os.path.join(image_folder_path_test, "*.jpg")):
    image_name = os.path.basename(image_path)
    image_number, _, version, _ = image_name.rsplit(".")
    image_number = image_number.rsplit("_")[0]

    # Save image
    image = cv2.imread(image_path)
    output_image_path = os.path.join(output_image_folder_test, f"{image_number}_{version}.jpg")
    cv2.imwrite(output_image_path, image)

In [5]:
import csv, glob, os
 
train_labels_path = "data/train_labels.csv"
output_train_labels_path = "data_2/train_labels.csv"
image_folder_path = "annotated_data/train/"

# Read train_labels.csv
labels_dict = {}
with open(train_labels_path, "r") as labels_file:
    reader = csv.reader(labels_file)
    next(reader)  # Skip header
    for row in reader:
        pothole_number, bags = row
        labels_dict[pothole_number] = bags
        
        
# Create new train_labels.csv
with open(output_train_labels_path, "w", newline='') as output_file:
    writer = csv.writer(output_file)
    writer.writerow(["Pothole number", "Bags used"])

    for image_path in glob.glob(os.path.join(image_folder_path, "*.jpg")):
        image_name = os.path.basename(image_path)
        image_number, _, version, _ = image_name.rsplit(".")
        image_number = image_number.rsplit("_")[0].replace("p", "")

        if image_number in labels_dict:
            bags = labels_dict[image_number]
            writer.writerow([image_number+"_"+version, bags])

In [6]:
# Paths
test_image_folder = "data_2/test_images/"
output_test_labels_path = "data_2/test_labels.csv"

# Create output directory if it doesn't exist
os.makedirs(os.path.dirname(output_test_labels_path), exist_ok=True)

# Create new test_labels.csv
with open(output_test_labels_path, "w", newline='') as output_file:
    writer = csv.writer(output_file)
    writer.writerow(["Pothole number", "Bags used"])

    for image_path in glob.glob(os.path.join(test_image_folder, "*.jpg")):
        image_name = os.path.basename(image_path)
        image_number, version = image_name.rsplit("_")
        image_number = image_number.replace("p", "")
        version = version.replace(".jpg", "")
        writer.writerow([image_number+"_"+version, None])

print("Processing complete!")

Processing complete!
