# **Set the Environment & Install Libraries**

`python -m venv venv`

`.\venv\Scripts\activate`

`pip install ultralytics opencv-python jupyter ipykernel tqdm`

# **Import Libraries**

In [None]:
import xml.etree.ElementTree as ET
import json
import os
from tqdm import tqdm


# **Define All the File Paths**

In [None]:
# Dataset in the same directory
current_dir = os.getcwd()
DATASET_ROOT = os.path.join(current_dir, "ZJU-Leaper")

# JSON file
GROUP_JSON_FILE = "group1.json"

# New dir for labels
OUTPUT_LABELS_DIR = "Label"

# Image size
IMAGE_WIDTH = 512
IMAGE_HEIGHT = 512

# Class ID for defects.
DEFECT_CLASS_ID = 0

# Define paths
group_json_path = os.path.join(DATASET_ROOT, 'ImageSets', 'Groups', GROUP_JSON_FILE)
annotations_dir = os.path.join(DATASET_ROOT, 'Annotations', 'xmls')
labels_output_dir = os.path.join(DATASET_ROOT, OUTPUT_LABELS_DIR)

os.makedirs(labels_output_dir, exist_ok=True)

print(f"DataSet path: {DATASET_ROOT}")
print(f"Annotations Path: {annotations_dir}")
print(f"Labels will be saved to: {labels_output_dir}")

# **Conversion**

In [None]:
def convert_to_yolo_format(image_width, image_height, box):
    """
    Converts XML (xmin, ymin, xmax, ymax) to YOLO format.

    YOLO format is: (x_center_norm, y_center_norm, width_norm, height_norm)
    All values are normalized (between 0 and 1).
    """
    xmin, ymin, xmax, ymax = box

    # Calculate normalized center coordinates
    x_center_norm = ((xmin + xmax) / 2.0) / image_width
    y_center_norm = ((ymin + ymax) / 2.0) / image_height

    # Calculate normalized width and height
    width_norm = (xmax - xmin) / image_width
    height_norm = (ymax - ymin) / image_height

    return (x_center_norm, y_center_norm, width_norm, height_norm)

print("`convert_to_yolo_format` is defined.")

In [None]:
print(f"Starting to process {GROUP_JSON_FILE}...")

# Read group1.json
try:
    with open(group_json_path, 'r') as f:
        data = json.load(f)

    # Combine all filename lists into one master list
    file_stems = []
    file_stems.extend(data['normal']['train'])
    file_stems.extend(data['normal']['test'])
    file_stems.extend(data['defect']['train'])

    # Check for 'defect' before adding
    if 'test' in data['defect']:
        file_stems.extend(data['defect']['test'])

except FileNotFoundError:
    print(f"ERROR: Could not find JSON file at: {group_json_path}")
    raise
except KeyError as e:
    print(f"ERROR: JSON structure is not as expected. Missing key: {e}")
    raise

print(f"Loaded {len(file_stems)} total file stems from {GROUP_JSON_FILE}.")

# Parse XML and Write TXT
processed_count = 0
skipped_count = 0
normal_count = 0
defect_count = 0

for base_filename in tqdm(file_stems, desc="Processing Annotations"):

    # XML  file
    xml_filename = f"{base_filename}.xml"
    xml_path = os.path.join(annotations_dir, xml_filename)

    # TXT file
    txt_filename = f"{base_filename}.txt"
    txt_path = os.path.join(labels_output_dir, txt_filename)

    if not os.path.exists(xml_path):
        skipped_count += 1
        continue

    try:
        # Parse the XML tree
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Check if the image is marked as defective
        is_defective = int(root.find('defective').text)

        if is_defective == 0:
            # If no defect, create an empty .txt file
            with open(txt_path, 'w') as out_file:
                pass
            normal_count += 1

        else:
            # If defect, create .txt to write
            defect_count += 1

            with open(txt_path, 'w') as out_file:
                for bbox in root.findall('bbox'):

                    # Get bounding box coordinates
                    box = [
                        float(bbox.find('xmin').text),
                        float(bbox.find('ymin').text),
                        float(bbox.find('xmax').text),
                        float(bbox.find('ymax').text)
                    ]

                    # Convert to YOLO format
                    yolo_box = convert_to_yolo_format(IMAGE_WIDTH, IMAGE_HEIGHT, box)

                    out_file.write(f"{DEFECT_CLASS_ID} {yolo_box[0]} {yolo_box[1]} {yolo_box[2]} {yolo_box[3]}\n")

        processed_count += 1

    except Exception as e:
        print(f"ERROR processing {xml_filename}: {e}")
        skipped_count += 1

print("\n--- Processing Complete ---")
print(f"Successfully processed: {processed_count} files.")
print(f"  - Normal (empty .txt): {normal_count}")
print(f"  - Defective (with boxes): {defect_count}")
print(f"Skipped (missing XML or errors): {skipped_count} files.")
print(f"All .txt files are saved in: {labels_output_dir}")

# **Visualize**

In [None]:
import cv2
import matplotlib.pyplot as plt
import random

# No of random images to show
NUM_IMAGES_TO_SHOW = 9

# Dataset path
images_source_dir = os.path.join(DATASET_ROOT, 'images')

# Class name mapping
CLASS_NAMES = {
    0: "Defect"
}

# Get a random sample
if 'file_stems' not in locals():
    print("ERROR: 'file_stems' not found. Please re-run Cell 4 first.")
    raise NameError("'file_stems' is not defined")

defective_stems = []
for stem in file_stems:
    label_path = os.path.join(labels_output_dir, f"{stem}.txt")
    if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
        defective_stems.append(stem)

if not defective_stems:
    print("ERROR: Could not find any defective stems. Did Cell 4 run correctly?")
    raise ValueError("No defective files found")

if len(defective_stems) < NUM_IMAGES_TO_SHOW:
    print(f"Warning: Found fewer defective images than requested. Will show {len(defective_stems)}.")
    sample_stems = defective_stems
else:
    sample_stems = random.sample(defective_stems, NUM_IMAGES_TO_SHOW)

print(f"Displaying {len(sample_stems)} random defective images...")

# Plot size
plt.figure(figsize=(15, 10))

for i, stem in enumerate(sample_stems):
    image_path = os.path.join(images_source_dir, f"{stem}.jpg")
    label_path = os.path.join(labels_output_dir, f"{stem}.txt")

    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Could not load image {image_path}, skipping.")
        continue

    # BGR to RGB for matplotlib
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Get the ACTUAL image dimensions
    actual_height, actual_width, _ = image.shape
    # --- END OF FIX ---

    # Read the label file
    with open(label_path, 'r') as f:
        for line in f.readlines():
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center_norm = float(parts[1])
            y_center_norm = float(parts[2])
            width_norm = float(parts[3])
            height_norm = float(parts[4])

            # De-normalize coordinates using ACTUAL dimensions
            box_width = width_norm * actual_width
            box_height = height_norm * actual_height
            x_center = x_center_norm * actual_width
            y_center = y_center_norm * actual_height

            xmin = int(x_center - (box_width / 2))
            ymin = int(y_center - (box_height / 2))
            xmax = int(x_center + (box_width / 2))
            ymax = int(y_center + (box_height / 2))

            # Get class name
            label = CLASS_NAMES.get(class_id, "Unknown")

            # Draw the box
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) # Green box, 2px thick
            # Draw the label
            cv2.putText(image, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Add to plot
    plt.subplot((NUM_IMAGES_TO_SHOW + 1) // 3, 3, i + 1) # Arrange in a grid
    plt.imshow(image)
    plt.title(f"Image: {stem}.jpg")
    plt.axis('off')

plt.tight_layout()
plt.show()