## Building XML documents

In [1]:
import xml.etree.ElementTree as ET

In [2]:
a = ET.Element('a')
b = ET.SubElement(a, 'b')
c = ET.SubElement(a, 'c')
d = ET.SubElement(c, 'd')
ET.dump(a)

<a><b /><c><d /></c></a>


## Parsing XML with Namespaces

In [20]:
xml_text = '''<?xml version="1.0"?>
<actors xmlns:fictional="http://characters.example.com"
        xmlns="http://people.example.com">
    <actor>
        <name>John Cleese</name>
        <fictional:character>Lancelot</fictional:character>
        <fictional:character>Archie Leach</fictional:character>
    </actor>
    <actor>
        <name>Eric Idle</name>
        <fictional:character>Sir Robin</fictional:character>
        <fictional:character>Gunther</fictional:character>
        <fictional:character>Commander Clement</fictional:character>
    </actor>
</actors>
'''

In [21]:
root = ET.fromstring(xml_text)

In [22]:
for actor in root.findall('{http://people.example.com}actor'):
    name = actor.find('{http://people.example.com}name')
    print(name.text)
    for char in actor.findall('{http://characters.example.com}character'):
        print(' |-->', char.text)

John Cleese
 |--> Lancelot
 |--> Archie Leach
Eric Idle
 |--> Sir Robin
 |--> Gunther
 |--> Commander Clement


In [23]:
# A better way to search the namespaced XML example is to create a dictionary 
# with your own prefixes and use those in the search functions

ns = {'real_person': 'http://people.example.com',
      'role': 'http://characters.example.com'}

for actor in root.findall('real_person:actor', ns):
    name = actor.find('real_person:name', ns)
    print(name.text)
    for char in actor.findall('role:character', ns):
        print(' |-->', char.text)

John Cleese
 |--> Lancelot
 |--> Archie Leach
Eric Idle
 |--> Sir Robin
 |--> Gunther
 |--> Commander Clement


## XPath support

In [26]:
import xml.etree.ElementTree as ET

countrydata = '''<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>
'''

root = ET.fromstring(countrydata)

In [28]:
# Top-level elements
root.findall(".")

[<Element 'data' at 0x729e8120>]

In [29]:
# All 'neighbor' grand-children of 'country' children of the top-level
# elements
root.findall("./country/neighbor")

[<Element 'neighbor' at 0x729e8330>,
 <Element 'neighbor' at 0x729e83c0>,
 <Element 'neighbor' at 0x729e8300>,
 <Element 'neighbor' at 0x729e8450>,
 <Element 'neighbor' at 0x729e8570>]

In [30]:
# Nodes with name='Singapore' that have a 'year' child
root.findall(".//year/..[@name='Singapore']")

[<Element 'country' at 0x729e83f0>]

In [31]:
# 'year' nodes that are children of nodes with name='Singapore'
root.findall(".//*[@name='Singapore']/year")

[<Element 'year' at 0x729e8390>]

In [32]:
# All 'neighbor' nodes that are the second child of their parent
root.findall(".//neighbor[2]")

[<Element 'neighbor' at 0x729e83c0>, <Element 'neighbor' at 0x729e8570>]

In [33]:
# All dublin-core "title" tags in the document
root.findall(".//{http://purl.org/dc/elements/1.1/}title")

[]