In [1]:
from lxml import etree
from pathlib import Path
import json

In [2]:
# Load and parse the XML file
xml_path = Path().cwd() / "xml_data"
xml_file = xml_path / "host_original.xml"

# define name spaces for element tags like xmi::*
# fetch from exported xml file second line
namespaces = {
    "uml": "http://schema.omg.org/spec/UML/2.1",
    "xmi": "http://schema.omg.org/spec/XMI/2.1",
    "thecustomprofile": "http://www.sparxsystems.com/profiles/thecustomprofile/1.0",
    "GML": "http://www.sparxsystems.com/profiles/GML/1.0",
}

# parse xml to tree
tree = etree.parse(xml_file)
root = tree.getroot()

In [3]:
# Load and parse the XML file
components_file = Path().cwd() / "candidate_core_components_openai.json"
components = json.loads(components_file.read_text())["classes"]

In [4]:
# main node to find elements that do not fit the pattern for core components
elements = root.findall(".//xmi:Extension/elements/*", namespaces)

# collect the ids to non-core components
ids_to_delete = {}

# loop  over the elements and collect their ids if the pattern did not hold
for element in elements:
    # this f"{{{namespaces['xmi']}}}idref" equivelant to "xmi:idref" matching xml file format
    element_id = element.attrib.get(f"{{{namespaces['xmi']}}}idref")
    element_name = element.attrib.get("name")
    element_type = element.attrib.get(f"{{{namespaces['xmi']}}}type")

    if element_name not in components and element_type != "uml:Package":
        ids_to_delete[element_id] = element_name

In [5]:
def remove_elements(xpath):
    """Find elements base on xpath then remove them."""
    # find elements
    elements = root.findall(xpath, namespaces)
    # remove elements
    for element in elements:
        element.getparent().remove(element)

    return len(elements)

def remove_elements_via_xpath(id_, xpaths, name, log, is_verbose):
    """Loop over the xpaths to remove the element based on id"""
    for key, xpath_template in xpaths.items():
        log[name][key] = remove_elements(xpath_template.format(id_=id_))


def run_remove_elements_via_xpath(ids_to_delete, xpaths, is_verbose):
    """Loop the ids to remove the elements"""
    # collect information
    log = {}
    # Delete collected ids from 5 main locations
    for id_, name in ids_to_delete.items():
        log[name] = {}
        remove_elements_via_xpath(id_, xpaths, name, log, is_verbose)
    return log

In [6]:
# define XPath expressions
xpaths = {
    "element_extension": ".//xmi:Extension/elements/*[@xmi:idref='{id_}']",
    "element_model": ".//uml:Model/packagedElement/*[@xmi:id='{id_}']",
    "ownedEnd": ".//uml:Model/packagedElement/*/ownedEnd/type[@xmi:idref='{id_}']",
    "connector_source": ".//xmi:Extension/connectors/*/source[@xmi:idref='{id_}']",
    "diagram": ".//xmi:Extension/diagrams/*/elements/*/[@subject='{id_}']",
}

# print extra information
is_verbose = False

# remove enumerations
log = run_remove_elements_via_xpath(ids_to_delete, xpaths, is_verbose)

In [7]:
# clean elements links from deleted elements
links_xpaths = {
    "links_start": ".//xmi:Extension/elements/*/links[@start='{id_}']",
    "links_end": ".//xmi:Extension/elements/*/links[@end='{id_}']",
}

logs_inners = run_remove_elements_via_xpath(ids_to_delete, links_xpaths, is_verbose)

In [8]:
# export core components to xml file
xml_test_file = xml_path / "host_core_components_openai.xml"
xml_test_file.touch(exist_ok=True)


tree.write(xml_test_file, encoding="utf-8")

In [9]:
pkg = root.findall(
    f".//uml:Model/*[@xmi:type='uml:Package']",
    namespaces,
)

In [10]:
# get package id
packages = root.findall(
    f".//uml:Model/*[@xmi:type='uml:Package']",
    namespaces,
)

core_components_package = {}

for package in packages:
    package_name = package.attrib.get("name")
    package_id = package.attrib.get(f"{{{namespaces['xmi']}}}id")

    # get all elements for this package
    elements = root.iterfind(
        f".//xmi:Extension/elements/element[@xmi:type='uml:Class']/model[@package='{package_id}']",
        namespaces,
    )

    # collect name + file path + any merged ones
    core_components = {}

    for e in elements:  # loop over all elements in package

        e = e.getparent()
        e_attrib = lambda e_, x: e_.attrib.get(x)  # support function
        e_id = e_attrib(e, f"{{{namespaces['xmi']}}}idref")
        e_name = e_attrib(e, "name")
        code_data = e.find("code")  # get header file path

        if code_data is not None:

            if e_attrib(code_data, "genfile") is not None:  # if the element has a code
                core_components[e_attrib(e, "name")] = e_attrib(code_data, "genfile")

    core_components_package[package_name] = core_components

In [11]:
# modifying paths to header files of the core components
# Enterprise Architect imports file using absolute path
for package in core_components_package.keys():
    for c in core_components_package[package].keys():
        t = Path(
            core_components_package[package][c].replace("\\", "/")
        )  # replace backslash with forward slash
        core_components_package[package][
            c
        ] = [f"{t.parent.parent.name}\\{t.parent.name}\\{t.name}"]  # use parent folder name and file name

In [12]:
filename = xml_path.parent / "core_components_openai.json"

with filename.open("w") as f:
    json.dump(core_components, f, indent=4)