In [1]:
from lxml import etree
from difflib import unified_diff

def normalize_xml_ignore_enrolment_order(xml_path: str):
    ns = {'ns': 'http://psdata.eae.alberta.ca/enrol/3'}
    parser = etree.XMLParser(remove_blank_text=True)
    tree = etree.parse(xml_path, parser)
    root = tree.getroot()

    # Sort <Learner> elements by StudentID
    learners = root.findall("ns:Learner", namespaces=ns)
    learners_sorted = sorted(learners, key=lambda l: l.get("StudentID"))
    for learner in learners:
        root.remove(learner)
    for learner in learners_sorted:
        root.append(learner)

        # Sort <Enrolment> elements inside each Learner by full serialized string
        enrolments = learner.findall("ns:Enrolment", namespaces=ns)
        enrolments_sorted = sorted(enrolments, key=lambda e: etree.tostring(e, encoding="unicode"))
        for e in enrolments:
            learner.remove(e)
        for e in enrolments_sorted:
            learner.append(e)

    # Return pretty-printed XML as lines
    return etree.tostring(root, encoding="unicode", pretty_print=True).splitlines()

def compare_xml_files(file1, file2):
    xml1_lines = normalize_xml_ignore_enrolment_order(file1)
    xml2_lines = normalize_xml_ignore_enrolment_order(file2)

    diff = list(unified_diff(xml1_lines, xml2_lines, fromfile=file1, tofile=file2, lineterm=""))
    if diff:
        print("❌ XML files are different (excluding enrolment order):")
        for line in diff:
            print(line)
    else:
        print("✅ XML files are identical (ignoring enrolment order).")

# Replace with your file paths
compare_xml_files("LERS_XML_DATA.xml", "LERS_CONVERTED_DATA.xml")


✅ XML files are identical (ignoring enrolment order).
