In [None]:
import os
import cv2
import xml.etree.ElementTree as ET
from xml.dom import minidom
from ultralytics import YOLO
from itertools import pairwise

# Mapping YOLO classes vers types (à adapter à ton entraînement)
CLASS_MAP = {
    0: "Number",
    1: "Name",
    2: "Paragraph",
    3: "Margin"
}

model = YOLO("models/detect/train/weights/best.pt")  
max_detections = {"Number": 8, "Name": 16, "Paragraph": 8, "Margin": 10}

def prettify_xml(elem):
    """Retourne une chaîne XML joliment indentée (4 espaces)."""
    rough = ET.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough)
    pretty_bytes = reparsed.toprettyxml(indent='    ', encoding='utf-8')  # bytes
    text = pretty_bytes.decode('utf-8')
    # supprimer lignes vides inutiles (minidom en met parfois)
    lines = [ln for ln in text.splitlines() if ln.strip()]
    return '\n'.join(lines) + '\n'

def code_to_year(code: str) -> int:
    num = int(code[2:])  # ex: "18" -> 18
    if num <= 24:
        return 1807 + (num - 18)  # 3E18 -> 1807, 3E24 -> 1813
    elif num == 25:
        return 1813  # exception
    else:
        return 1814 + (num - 26)  # 3E26 -> 1814

In [None]:
def create_img_xml(img_name, width, year, images_info, output_dir):
    img_elem = ET.Element("Image",
                          Name=os.path.splitext(img_name)[0],
                          Width=str(width),
                          Year=str(year),
                          Bbox=str(sum(len(img["regions"]) for img in images_info)))
    
    region_id = 1
    for i, img in enumerate(images_info, start=1):
        for reg in img["regions"]:
            region_elem = ET.SubElement(img_elem, "TextRegion",
                                        Id=str(region_id),
                                        Side=reg["side"],
                                        Type=reg["type"],
                                        Conf=str(reg["conf"]))
            ET.SubElement(region_elem, "Coord",
                          Points=f"({reg['xt']},{reg['yt']},{reg['xb']},{reg['yb']})")
            region_id += 1
    os.makedirs(output_dir, exist_ok=True)
    name = os.path.splitext(img_name)[0]
    xml_path = os.path.join(output_dir, f"{name}.xml")
    pretty = prettify_xml(img_elem)
    with open(xml_path, "w", encoding="utf-8") as f:
        f.write(pretty)