In [None]:
import xml.etree.ElementTree as ET
import os
import glob
from datetime import datetime

def merge_cvat_annotations(xml_files, output_dir):
    """
    Merges multiple CVAT 1.1 XML annotation files into a single file,
    renames specified labels in both metadata and image annotations.
    - Labels "bb", "nn", "yy", "xx" will be renamed to "Bounding Box".
    - Labels like "skel", "skelton", "ratskel", "RatSkeleton" will be renamed to "RatSkeleton".

    Args:
        xml_files (list): A list of paths to the CVAT XML files to merge.
                          The first file in the list is used as the base for metadata.
        output_dir (str): The directory where the merged XML file will be saved.
    """
    if not xml_files:
        print("No XML files provided to merge.")
        return

    # --- Define label mapping ---
    # Labels to be renamed to "Bounding Box"
    bounding_box_sources = ["bb", "nn", "yy", "xx"] 
    # Labels to be renamed to "RatSkeleton"
    rat_skeleton_sources = ["skel", "skelton", "ratskel", "RatSkeleton"] # Added "skel"
    # --- End of label mapping ---

    # Use the first file as the base for the meta structure
    base_file_path = xml_files[0]
    print(f"Using '{os.path.basename(base_file_path)}' as the base for metadata.")
    try:
        base_tree = ET.parse(base_file_path)
        base_root = base_tree.getroot()
    except ET.ParseError as e:
        print(f"Error parsing base XML file {base_file_path}: {e}")
        return
    except FileNotFoundError:
        print(f"Base XML file not found: {base_file_path}")
        return

    # Create a new root for the merged annotations
    merged_root = ET.Element("annotations")
    
    # Copy version from the base file
    version_element = base_root.find("version")
    if version_element is not None:
        merged_root.append(version_element)
    else:
        ET.SubElement(merged_root, "version").text = "1.1"

    # Copy meta from the base file
    meta_element_orig = base_root.find("meta")
    if meta_element_orig is None:
        print(f"Warning: No <meta> tag found in the base file {base_file_path}. Creating a basic one.")
        meta_element = ET.SubElement(merged_root, "meta")
        task_element_for_meta = ET.SubElement(meta_element, "task") 
        ET.SubElement(task_element_for_meta, "name").text = "Merged Task"
        ET.SubElement(task_element_for_meta, "labels") 
    else:
        meta_element = ET.fromstring(ET.tostring(meta_element_orig)) # Deep copy
        merged_root.append(meta_element)

    # --- Rename labels in the copied meta_element (metadata) ---
    task_meta_element = meta_element.find("task")
    if task_meta_element is not None:
        labels_element = task_meta_element.find("labels")
        if labels_element is not None:
            final_label_elements = []
            added_target_names = set() 

            for label_elem in list(labels_element):
                name_tag = label_elem.find("name")
                if name_tag is not None:
                    original_name = name_tag.text
                    target_name = None
                    is_processed_for_target = False

                    if original_name in bounding_box_sources:
                        target_name = "Bounding Box"
                    elif original_name in rat_skeleton_sources:
                        target_name = "RatSkeleton"

                    if target_name:
                        if target_name not in added_target_names:
                            print(f"Transforming metadata label '{original_name}' to '{target_name}'.")
                            name_tag.text = target_name
                            final_label_elements.append(label_elem)
                            added_target_names.add(target_name)
                            is_processed_for_target = True
                        else:
                            print(f"Metadata label '{original_name}' also maps to '{target_name}', but '{target_name}' is already defined. Removing redundant source.")
                            is_processed_for_target = True 
                    
                    if not is_processed_for_target :
                        is_duplicate = False
                        for fle in final_label_elements:
                            fle_name_tag = fle.find("name")
                            if fle_name_tag is not None and fle_name_tag.text == original_name:
                                is_duplicate = True
                                break
                        if not is_duplicate:
                            final_label_elements.append(label_elem)
                        else:
                             print(f"Skipping duplicate metadata label definition for '{original_name}'.")

                else:
                    final_label_elements.append(label_elem)
            
            for child in list(labels_element):
                labels_element.remove(child)
            for elem in final_label_elements:
                labels_element.append(elem)

    # --- End of label renaming in metadata ---

    current_image_id = 0
    total_images_processed = 0
    annotations_renamed_count = {"Bounding Box": 0, "RatSkeleton": 0}

    # Process all files, including the base file for its images
    for i, file_path in enumerate(xml_files):
        try:
            tree = ET.parse(file_path)
            root = tree.getroot()
        except ET.ParseError as e:
            print(f"Error parsing XML file {file_path}: {e}. Skipping this file.")
            continue
        except FileNotFoundError:
            print(f"XML file not found: {file_path}. Skipping this file.")
            continue

        images = root.findall("image")
        if not images:
            print(f"No <image> tags found in {file_path}.")
            continue
            
        for image_elem_orig in images:
            image_elem = ET.fromstring(ET.tostring(image_elem_orig))
            image_elem.set("id", str(current_image_id))

            for ann_tag in image_elem:
                current_label = ann_tag.get("label")
                if current_label in bounding_box_sources:
                    ann_tag.set("label", "Bounding Box")
                    annotations_renamed_count["Bounding Box"] += 1 
                elif current_label in rat_skeleton_sources: 
                    ann_tag.set("label", "RatSkeleton")
                    annotations_renamed_count["RatSkeleton"] += 1

            merged_root.append(image_elem)
            current_image_id += 1
            total_images_processed +=1
        
        print(f"Processed {len(images)} images from {os.path.basename(file_path)}")

    if annotations_renamed_count["Bounding Box"] > 0:
        print(f"Renamed {annotations_renamed_count['Bounding Box']} annotation(s) to 'Bounding Box' in image data.")
    if annotations_renamed_count["RatSkeleton"] > 0: 
        print(f"Renamed {annotations_renamed_count['RatSkeleton']} annotation(s) to 'RatSkeleton' in image data.")

    task_element = meta_element.find("task") 
    if task_element is not None:
        size_tag = task_element.find("size")
        if size_tag is not None:
            size_tag.text = str(total_images_processed)
        
        stop_frame_tag = task_element.find("stop_frame")
        if stop_frame_tag is not None:
            stop_frame_tag.text = str(total_images_processed - 1 if total_images_processed > 0 else 0)

        name_tag_task = task_element.find("name") 
        if name_tag_task is not None and name_tag_task.text:
            name_tag_task.text = name_tag_task.text + " (Merged)"
        else:
            if name_tag_task is None: name_tag_task = ET.SubElement(task_element, "name")
            name_tag_task.text = "Merged Annotations"

        segments = task_element.find("segments")
        if segments is not None:
            segment = segments.find("segment")
            if segment is not None:
                stop_tag = segment.find("stop")
                if stop_tag is not None:
                    stop_tag.text = str(total_images_processed - 1 if total_images_processed > 0 else 0)
    
    dumped_tag = meta_element.find("dumped")
    if dumped_tag is None:
        dumped_tag = ET.SubElement(meta_element, "dumped")
    dumped_tag.text = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f+00:00")

    output_tree = ET.ElementTree(merged_root)
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")

    output_file_path = os.path.join(output_dir, "merged_annotations.xml")
    
    try:
        try:
            ET.indent(output_tree, space="  ", level=0)
        except AttributeError:
            print("Warning: ET.indent not available. XML will not be pretty-printed.")
            pass

        output_tree.write(output_file_path, encoding="utf-8", xml_declaration=True)
        print(f"\n✅ Successfully merged {total_images_processed} images into {output_file_path}")
    except Exception as e:
        print(f"Error writing merged XML to {output_file_path}: {e}")


if __name__ == '__main__':
    # --- 1. SET THE FOLDER CONTAINING YOUR XML FILES ---
    # Point this to the directory where your annotations.xml, annotations2.xml, etc. are located.
    source_folder = r"C:\Users\sande\Downloads\1"

    # --- 2. FIND ALL XML FILES IN THE FOLDER ---
    # This uses glob to find all files ending with .xml in your source_folder
    search_pattern = os.path.join(source_folder, '*.xml')
    xml_files_to_process = glob.glob(search_pattern)

    # --- 3. PROCESS THE FILES ---
    if not xml_files_to_process:
        print(f"❌ No XML files found in the folder: {source_folder}")
    else:
        # Sort files alphabetically to ensure a consistent processing order.
        # The first file in this list will be used for the base metadata.
        xml_files_to_process.sort()
        
        print(f"Found {len(xml_files_to_process)} XML files to merge.")
        
        # --- Define the output folder ---
        # A new folder 'merged_output' will be created inside your source_folder.
        output_folder = os.path.join(source_folder, 'merged_output')
        
        # --- Run the main merging function ---
        merge_cvat_annotations(xml_files_to_process, output_folder)