# Python XML Parser - A Roadmap to XML Parsers in Python
https://realpython.com/python-xml-parser/ 


In [10]:
from xml.dom.minidom import parse, parseString

In [11]:
# Parse XML from a filename
document = parse("../data/smiley.svg")
document

<xml.dom.minidom.Document at 0x1637182f7c0>

In [12]:
# Parse XML from a file object
with open("../data/smiley.svg") as file:
    document = parse(file)

In [13]:
# Parse XML from a Python string
# document = parseString("""
# <svg>
# More content
# </svg
# """)

In [14]:
# Parse XML from a Python string
document.version, document.encoding, document.standalone

('1.0', 'UTF-8', False)

In [15]:
# Document Type Definition (DTD)
dtd = document.doctype
dtd.entities["custom_entity"].childNodes

[<DOM Text node "'Hello'">]

In [16]:
# Document Root
document.documentElement

<DOM Element: svg at 0x163717f9240>

In [18]:
print(document.getElementById("skin") is None)
print(document.getElementById("smiley") is None)

True
True


In [19]:
from xml.dom.minidom import Node

def set_id_attribute(parent, attribute_name="id"):
    if parent.nodeType == Node.ELEMENT_NODE:
        if parent.hasAttribute(attribute_name):
            parent.setIdAttribute(attribute_name)
    for child in parent.childNodes:
        set_id_attribute(child, attribute_name)

In [20]:
set_id_attribute(document)

In [22]:
print(document.getElementById("skin") is None)
print(document.getElementById("smiley") is None)
print(document.getElementById("skin"))
print(document.getElementById("smiley"))

False
False
<DOM Element: linearGradient at 0x16371940040>
<DOM Element: g at 0x16371940280>


In [23]:
document.getElementsByTagName("ellipse")

[<DOM Element: ellipse at 0x163719403a0>,
 <DOM Element: ellipse at 0x16371940430>]

In [24]:
root = document.documentElement
root.getElementsByTagName("ellipse")

[<DOM Element: ellipse at 0x163719403a0>,
 <DOM Element: ellipse at 0x16371940430>]

In [25]:
document.getElementsByTagNameNS("http://www.inkscape.org/namespaces/inkscape","custom")

[<DOM Element: inkscape:custom at 0x163717fab90>]

In [26]:
document.getElementsByTagNameNS("*", "custom")

[<DOM Element: inkscape:custom at 0x163717fab90>]

In [28]:
element = document.getElementById("smiley")
print(element.parentNode)
print(element.firstChild)
print(element.lastChild)
print(element.nextSibling)
print(element.previousSibling)

<DOM Element: svg at 0x163717f9240>
<DOM Text node "'\n    '">
<DOM Text node "'\n  '">
<DOM Text node "'\n  '">
<DOM Text node "'\n  '">


In [33]:
def remove_whitespace(node):
    if node.nodeType == Node.TEXT_NODE:
        if node.nodeValue.strip() == "":
            node.nodeValue = ""
    for child in node.childNodes:
        remove_whitespace(child)

In [34]:
remove_whitespace(document)
document.normalize()

In [35]:
element = document.getElementById("smiley")
print(element.parentNode)
print(element.firstChild)
print(element.lastChild)
print(element.nextSibling)
print(element.previousSibling)

<DOM Element: svg at 0x163717f9240>
<DOM Comment node "' Head '">
<DOM Element: path at 0x163719404c0>
<DOM Element: text at 0x16371940550>
<DOM Element: defs at 0x163717fab00>


In [36]:
element.childNodes

[<DOM Comment node "' Head '">,
 <DOM Element: circle at 0x16371940310>,
 <DOM Comment node "' Eyes '">,
 <DOM Element: ellipse at 0x163719403a0>,
 <DOM Element: ellipse at 0x16371940430>,
 <DOM Comment node "' Mouth '">,
 <DOM Element: path at 0x163719404c0>]