In [1]:
# --- TEMPLATE AUTO-POLYGONIZER (WITH ARTIFACT CLEANING) ---
import cv2
import os
import numpy as np
import shutil
from tqdm import tqdm

# 1. CONFIGURATION
INPUT_DIR = "/content/drive/MyDrive/auto_label/hvac_templates"
OUTPUT_DIR = "/content/drive/MyDrive/auto_label/hvac_templates_polygonized"

# The Official 26-Class Mapping
CLASS_MAP = {
    "instruments/computer_hexagon/auxiliary_location": 0,
    "instruments/computer_hexagon/field_mounted": 1,
    "instruments/computer_hexagon/primary_location": 2,
    "instruments/discrete_circle/auxiliary_location": 3,
    "instruments/discrete_circle/field_mounted": 4,
    "instruments/discrete_circle/primary_location": 5,
    "instruments/plc_diamond/auxiliary_location": 6,
    "instruments/plc_diamond/field_mounted": 7,
    "instruments/plc_diamond/primary_location": 8,
    "instruments/shared_display/auxiliary_location": 9,
    "instruments/shared_display/field_mounted": 10,
    "instruments/shared_display/primary_location": 11,
    "signals/capillary": 12,
    "signals/data_link": 13,
    "signals/electrical": 14,
    "signals/hydraulic": 15,
    "signals/pneumatic": 16,
    "signals/wireless": 17,
    "valves/ball": 18,
    "valves/butterfly": 19,
    "valves/diaphragm": 20,
    "valves/gate": 21,
    "valves/globe": 22,
    "valves/pinch": 23,
    "valves/plug": 24,
    "valves/rotary_disc": 25
}

def clean_rotation_artifacts(image):
    """
    Checks the 4 corners of the image. If they are black (artifacts),
    flood-fills them with white to remove them.
    """
    h, w = image.shape[:2]
    mask = np.zeros((h+2, w+2), np.uint8)

    # Check all 4 corners: Top-Left, Top-Right, Bot-Left, Bot-Right
    corners = [(0,0), (w-1, 0), (0, h-1), (w-1, h-1)]

    # We work on a copy to avoid messing up the original loop
    cleaned = image.copy()

    for x, y in corners:
        # Check pixel brightness. If it's dark (< 50), it's likely an artifact.
        pixel_value = cleaned[y, x]
        if pixel_value < 50:
            # Flood fill from this corner with White (255)
            cv2.floodFill(cleaned, mask, (x, y), 255)

    return cleaned

# Clean output dir
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(os.path.join(OUTPUT_DIR, "images"))
os.makedirs(os.path.join(OUTPUT_DIR, "labels"))

print("üöÄ Starting Auto-Polygonization with Artifact Cleaning...")

processed_count = 0

for folder_subpath, class_id in CLASS_MAP.items():
    full_source_path = os.path.join(INPUT_DIR, folder_subpath)

    if not os.path.exists(full_source_path):
        continue

    files = [f for f in os.listdir(full_source_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for filename in files:
        img_path = os.path.join(full_source_path, filename)

        try:
            # 1. Read Image
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)

            # 2. Handle Transparency -> White
            if len(img.shape) == 3 and img.shape[2] == 4:
                alpha = img[:, :, 3]
                rgb = img[:, :, :3]
                white_bg = np.full_like(rgb, 255)
                alpha_factor = alpha[:, :, np.newaxis] / 255.0
                img = (rgb * alpha_factor + white_bg * (1 - alpha_factor)).astype(np.uint8)

            # 3. Convert to Grayscale
            if len(img.shape) == 3:
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            else:
                gray = img

            # --- NEW STEP: CLEAN ROTATION ARTIFACTS ---
            gray = clean_rotation_artifacts(gray)
            # ------------------------------------------

            # 4. Binary Threshold (Invert: Symbol becomes White, Background Black)
            _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)

            # 5. Find Contours
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            if contours:
                # Get largest contour (the symbol)
                c = max(contours, key=cv2.contourArea)

                # Simplify
                epsilon = 0.002 * cv2.arcLength(c, True)
                approx = cv2.approxPolyDP(c, epsilon, True)

                # Normalize Points
                h, w = gray.shape
                normalized_points = []
                for point in approx:
                    x, y = point[0]
                    x = min(max(x, 0), w)
                    y = min(max(y, 0), h)
                    normalized_points.append(f"{x/w:.6f} {y/h:.6f}")

                # 6. Save CLEANED Image (Critical: Save the version with white corners!)
                final_name = f"{class_id}_{filename}"
                # We save the 'gray' image because it has the artifacts removed
                cv2.imwrite(os.path.join(OUTPUT_DIR, "images", final_name), gray)

                # 7. Save Label
                label_name = os.path.splitext(final_name)[0] + ".txt"
                with open(os.path.join(OUTPUT_DIR, "labels", label_name), "w") as f:
                    f.write(f"{class_id} " + " ".join(normalized_points) + "\n")

                processed_count += 1

        except Exception as e:
            print(f"‚ùå Error processing {filename}: {e}")

print(f"\nüéâ DONE! Processed {processed_count} templates.")
print(f"üìÇ Output saved to: {OUTPUT_DIR}")
print("üëâ ACTION: Upload the 'hvac_templates_polygonized' folder to Roboflow.")

üöÄ Starting Auto-Polygonization with Artifact Cleaning...

üéâ DONE! Processed 215 templates.
üìÇ Output saved to: /content/drive/MyDrive/auto_label/hvac_templates_polygonized
üëâ ACTION: Upload the 'hvac_templates_polygonized' folder to Roboflow.
