In [1]:
import xml.etree.ElementTree as ET
import numpy as np
import cv2  # OpenCV for image handling
import os

def convert_multiview_xml_to_masks(xml_path, output_dir):
    """
    Parses a CVAT-style XML annotation file containing multiple images and
    converts the annotations for each image into a separate binary mask file.

    Args:
        xml_path (str): The path to the input annotations.xml file.
        output_dir (str): The directory where the mask PNG files will be saved.
    """
    try:
        # --- Create the output directory if it doesn't exist ---
        os.makedirs(output_dir, exist_ok=True)
        print(f"Masks will be saved to: {output_dir}")

        # --- 1. Parse the XML file ---
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # --- 2. Iterate through each <image> tag in the XML ---
        image_tags = root.findall('image')
        if not image_tags:
            print("Error: No <image> tags found in the XML file.")
            return

        for image_tag in image_tags:
            # --- Get image-specific information ---
            image_name = image_tag.get('name')
            img_width = int(image_tag.get('width'))
            img_height = int(image_tag.get('height'))

            print(f"\nProcessing image: {image_name} ({img_width}x{img_height})")

            # --- 3. Create an empty canvas for this image's mask ---
            final_mask = np.zeros((img_height, img_width), dtype=np.uint8)

            # --- 4. Find all 'mask' annotations within the current <image> tag ---
            mask_tags = image_tag.findall('mask')
            if not mask_tags:
                print(f"  -> No track annotations found for this image. Saving an empty mask.")
            
            for mask_tag in mask_tags:
                if mask_tag.get('label') != 'Track':
                    continue  # Skip if not a track

                # --- Decode RLE for the current track ---
                rle_string = mask_tag.get('rle')
                rle_parts = [int(p) for p in rle_string.split(', ')]
                
                values = []
                current_val = 0
                for run_length in rle_parts:
                    values.extend([current_val] * run_length)
                    current_val = 1 - current_val

                # --- Reconstruct and place the mask patch ---
                mask_height = int(mask_tag.get('height'))
                mask_width = int(mask_tag.get('width'))
                mask_patch = np.array(values, dtype=np.uint8).reshape(mask_height, mask_width)

                left = int(mask_tag.get('left'))
                top = int(mask_tag.get('top'))

                # Define the region on the final mask
                roi = final_mask[top : top + mask_height, left : left + mask_width]
                
                # Paste the patch onto the canvas
                np.maximum(roi, mask_patch * 255, out=roi)
            
            # --- 5. Save the final mask for this image ---
            base_name = os.path.splitext(image_name)[0]
            output_mask_name = f"{base_name}_mask.png"
            output_mask_path = os.path.join(output_dir, output_mask_name)

            cv2.imwrite(output_mask_path, final_mask)
            print(f"  -> Successfully saved mask to: {output_mask_path}")

        print(f"\nProcessed {len(image_tags)} images in total.")

    except FileNotFoundError:
        print(f"Error: The file '{xml_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")



In [2]:
# --- Example Usage ---
# Path to your XML file
xml_file = 'obsidian_new/annotations.xml' 

# Name of the folder where all the masks will be saved
output_folder = 'training_masks' 

convert_multiview_xml_to_masks(xml_file, output_folder)

Masks will be saved to: training_masks

Processing image: Acquisition_2397_0_1.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2397_0_1_mask.png

Processing image: Acquisition_2397_1_1.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2397_1_1_mask.png

Processing image: Acquisition_2397_2_1.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2397_2_1_mask.png

Processing image: Acquisition_2397_3_1.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2397_3_1_mask.png

Processing image: Acquisition_2418_0_0.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2418_0_0_mask.png

Processing image: Acquisition_2418_0_1.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2418_0_1_mask.png

Processing image: Acquisition_2418_1_1.png (1024x1024)
  -> Successfully saved mask to: training_masks/Acquisition_2418_1_1_mask.png

Processing image: Acqu