In [1]:
import os
from pathlib import Path
import json
from lxml import etree
from openai import AzureOpenAI
from pydantic import BaseModel, Field
from dotenv import load_dotenv


load_dotenv(override=True)

True

In [2]:
# Load and parse the XML file
xml_path = Path().cwd() / "xml_data"
xml_file = xml_path / "host_original.xml"

# define name spaces for element tags like xmi::*
# fetch from exported xml file second line
namespaces = {
    "uml": "http://schema.omg.org/spec/UML/2.1",
    "xmi": "http://schema.omg.org/spec/XMI/2.1",
    "thecustomprofile": "http://www.sparxsystems.com/profiles/thecustomprofile/1.0",
    "GML": "http://www.sparxsystems.com/profiles/GML/1.0",
}

# parse xml to tree
tree = etree.parse(xml_file)
root = tree.getroot()

In [3]:
elements = root.findall(".//xmi:Extension/elements/*[@xmi:type='uml:Class']", namespaces)

In [13]:
classes = {}
relationships = []

for element in elements:
    entity = classes.get(element.attrib.get(f"{{{namespaces['xmi']}}}idref"))
    if entity is None:
        classes[element.attrib.get(f"{{{namespaces['xmi']}}}idref")] = {
            "name": element.attrib.get("name"),
            "attributes": [],
            "operations": [],
        }

    if element.find("./attributes") is not None:
        for attr in element.find("./attributes").iter():
            if attr.attrib.get("name") is not None:
                classes[element.attrib.get(f"{{{namespaces['xmi']}}}idref")][
                    "attributes"
                ].append(attr.attrib.get("name"))

    if element.find("./operations") is not None:
        for oper in element.find("./operations").iter():
            if oper.attrib.get("name") is not None:
                classes[element.attrib.get(f"{{{namespaces['xmi']}}}idref")][
                    "operations"
                ].append(oper.attrib.get("name"))

    if element.find("./links") is not None:
        for link in element.find("./links").iter():
            if link.attrib.get("start") is not None and link.attrib.get("end") is not None:
                relationships.append(
                    {
                        "child": link.attrib.get("start"),
                        "parent": link.attrib.get("end"),
                    }
                )

In [14]:
plantuml = []
for rel in relationships:
    parent = classes.get(rel["parent"])
    child = classes.get(rel["child"])
    if child and parent:
        plantuml.append(f"{child["name"]} -- {parent["name"]}")


for c in classes.values():
    plantuml.append(
        f"""class {c["name"]} {{
{"\n".join(c["attributes"])}
{"\n".join([f"{o}()" for o in c["operations"]])}
}}"""
    )


plantuml_text = "@startuml\n\n" + "\n".join(set(plantuml)).strip() + "\n\n@enduml"

In [15]:
output_file = xml_file.parent / f"{xml_file.stem}.puml"
output_file.write_text(plantuml_text)

4478

In [16]:
# format chatgpt response format
class Classes(BaseModel):
    classes: list[str] = Field(description="List of classes")

In [17]:
messages = [
    {
        "role": "user",
        "content": f"""<role>
You are an expert software engineer.
</role>
<goal>
Extract an abstract view from the classes keeping only the most important classes.
</goal>

<description>
You will receive a class diagram in PlantUML format highlighting all classes and their attributes and operations.
Additionally, the relationships between classes are also included in one form of association which is not influential but helps you know which class connected to which class.
</description>

<plantuml>
{plantuml_text}
</plantuml>
""",
    }
]

In [22]:
client = AzureOpenAI(
    api_key=os.getenv("AZURE_API_KEY"),
    api_version=os.getenv("AZURE_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_API_BASE"),
)

# call ChatGPT completion API
response = client.beta.chat.completions.parse(
    model=os.getenv("MODEL"),
    messages=messages,
    temperature=0.2,
    seed=1234,
    response_format=Classes,
    max_tokens=100,
)

# Add response to messages
messages.append({"role": "assistant", "content": response.choices[0].message.content})

# parse output
solution = json.loads(response.choices[0].message.content)

solution

{'classes': ['Dishwasher', 'Tank', 'Heater', 'Jet', 'AbstractFactory']}

In [19]:
core_classes_file = Path().cwd() / f"candidate_core_components_openai.json"
json.dump(solution, core_classes_file.open("w"), indent=4)