In [None]:
from functools import reduce
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element
import json

def get_xhtml_text(item: Element) -> str:
    if len(item.text.strip()) > 0:
        return item.text.strip()
    texts = [text.strip() for text in item.itertext() if len(text.strip()) > 0]
    return reduce(lambda x, y: x+"\n"+y, texts)

def get_cwe_info(cwe_id: str) -> str:
    """get cwe json string formatted information from xml with the given cwe id
    
    Args:
        cwe_id (str): cwe id
    
    Returns:
        str: cwe json string formatted information
    """
    root = ET.parse("./data/cwec_v4.16.xml")
    for item in root.getroot():
        if item.tag == "{http://cwe.mitre.org/cwe-7}Weaknesses":
            weaknesses = item

    for weakness in weaknesses:
        id = weakness.attrib["ID"]
        if id != cwe_id:
            continue
        name = weakness.attrib["Name"]
        abstraction = weakness.attrib["Abstraction"]
        description = ""
        ext_description = ""
        background_details = ""
        likelihood_of_exploit = ""
        consequences = []
        detection_methods = []
        potential_mitigations = []
        for item in weakness:
            match (item.tag):
                case "{http://cwe.mitre.org/cwe-7}Description":
                    description = get_xhtml_text(item)
                case "{http://cwe.mitre.org/cwe-7}Extended_Description":
                    ext_description = get_xhtml_text(item)
                case "{http://cwe.mitre.org/cwe-7}Background_Details":
                    background_details = get_xhtml_text(item)
                case "{http://cwe.mitre.org/cwe-7}Likelihood_Of_Exploit":
                    likelihood_of_exploit = get_xhtml_text(item)
                case "{http://cwe.mitre.org/cwe-7}Common_Consequences":
                    for consequence in item:
                        scope = ""
                        impact = ""
                        note = ""
                        for subconsequence in consequence:
                            match (subconsequence.tag):
                                case "{http://cwe.mitre.org/cwe-7}Scope":
                                    scope = get_xhtml_text(subconsequence)
                                case "{http://cwe.mitre.org/cwe-7}Impact":
                                    impact = get_xhtml_text(subconsequence)
                                case "{http://cwe.mitre.org/cwe-7}Note":
                                    note = get_xhtml_text(subconsequence)
                        conseq = {
                            "scope": scope,
                            "impact": impact,
                            "note": note,
                        }
                        consequences.append(conseq)
                case "{http://cwe.mitre.org/cwe-7}Detection_Methods":
                    for method in item:
                        detection_method_id = method.attrib["Detection_Method_ID"] if "Detection_Method_ID" in method.keys() else ""
                        method_name = ""
                        description = ""
                        effectiveness = ""
                        for method_detail in method:
                            match (method_detail.tag):
                                case "{http://cwe.mitre.org/cwe-7}Method":
                                    method_name = get_xhtml_text(method_detail)
                                case "{http://cwe.mitre.org/cwe-7}Description":
                                    description = get_xhtml_text(method_detail)
                                case "{http://cwe.mitre.org/cwe-7}Effectiveness":
                                    effectiveness = get_xhtml_text(method_detail)
                        detection_method = {
                            "detection_method_id": detection_method_id,
                            "method": method_name,
                            "description": description,
                            "effectiveness": effectiveness,
                        }
                        detection_methods.append(detection_method)
                case "{http://cwe.mitre.org/cwe-7}Potential_Mitigations":
                    for mitigation in item:
                        phase = ""
                        description = ""
                        effectiveness = ""
                        effectiveness_notes = ""
                        for mitigation_detail in mitigation:
                            match (mitigation_detail.tag):
                                case "{http://cwe.mitre.org/cwe-7}Phase":
                                    phase = get_xhtml_text(mitigation_detail)
                                case "{http://cwe.mitre.org/cwe-7}Description":
                                    description = get_xhtml_text(mitigation_detail)
                                case "{http://cwe.mitre.org/cwe-7}Effectiveness":
                                    effectiveness = get_xhtml_text(mitigation_detail)
                                case "{http://cwe.mitre.org/cwe-7}Effectiveness_Notes":
                                    effectiveness_notes = get_xhtml_text(
                                        mitigation_detail
                                    )
                        potential_mitigation = {
                            "phase": phase,
                            "description": description,
                            "effectiveness": effectiveness,
                            "effectiveness_notes": effectiveness_notes,
                        }
                        potential_mitigations.append(potential_mitigation)
        # construct cwe information parsed from weakness element
        cwe = {
            "id": id,
            "name": name,
            "abstraction": abstraction,
            "description": description,
            "extended_description": ext_description,
            "background_details": background_details,
            "likelihood_of_exploit": likelihood_of_exploit,
            "common_consequences": consequences,
            "detection_methods": detection_methods,
            "potential_mitigations": potential_mitigations,
        }
        return json.dumps(cwe)

cwe_78 = get_cwe_info("78")
print(cwe_78)