In [1]:
from xml.etree import ElementTree as ET

In [2]:
root = ET.fromstring("""<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>
""")

In [3]:
root

<Element 'data' at 0x00000184DC8ECC20>

In [4]:
root.text

'\n    '

In [7]:
root[0]

<Element 'country' at 0x00000184DC9198B0>

In [10]:
root[0].attrib

{'name': 'Liechtenstein'}

In [11]:
root[0].get('name')

'Liechtenstein'

In [13]:
for child in root:
    print(child.get('name'))

Liechtenstein
Singapore
Panama


In [15]:
for child in root.findall('country'):
    print(child.get('name'))

Liechtenstein
Singapore
Panama


In [18]:
#.find returns the first instance or first child of the root
root.find('country').get('name')

'Liechtenstein'

In [19]:
#using xpath
root.findall('.')

[<Element 'data' at 0x00000184DC8ECC20>]

In [20]:
root.findall('./country')

[<Element 'country' at 0x00000184DC9198B0>,
 <Element 'country' at 0x00000184DC919EA0>,
 <Element 'country' at 0x00000184DC91A090>]

In [21]:
for child in root.findall('./country/neighbor'):
    print(child.get('name'))

Austria
Switzerland
Malaysia
Costa Rica
Colombia


In [22]:
#last neighbor for every country
for child in root.findall('./country/neighbor[last()]'):
    print(child.get('name'))

Switzerland
Malaysia
Colombia


In [23]:

for child in root.findall('./country[@name="Liechtenstein"]'):
    print(child.get('name'))

Liechtenstein


In [25]:
for child in root.findall('./country[last()-2]'):
    print(child.get('name'))

Liechtenstein


In [26]:
for child in root.findall('./*[last()-2]'):
    print(child.get('name'))

Liechtenstein


In [27]:
for child in root.findall('./country[last()-2]'):
    print(child.tag)

country


In [28]:
for child in root.findall('.//neighbor'):
    print(child.get('name'))

Austria
Switzerland
Malaysia
Costa Rica
Colombia


In [29]:
for child in root.findall('./country//neighbor'):
    print(child.get('name'))

Austria
Switzerland
Malaysia
Costa Rica
Colombia


In [31]:
#countries with austria as neighbor
#.//neighbor means looking for neighbor
#neighbor[@name="Austria"] means looking for Austria as neighbor'
#/.. means moving up the heirarchy (moving up to the parent) to return the country which has austria as neighbor

for child in root.findall('.//neighbor[@name="Austria"]/..'):
    print(child.get('name'))

Liechtenstein


In [32]:
#number of children per country
for country in root.findall('country'):
    print(country.get('name'), len(country))

Liechtenstein 5
Singapore 4
Panama 5


In [33]:
#number of children per element
for child in root.iter():
    print(child.tag, len(child))

data 3
country 5
rank 0
year 0
gdppc 0
neighbor 0
neighbor 0
country 4
rank 0
year 0
gdppc 0
neighbor 0
country 5
rank 0
year 0
gdppc 0
neighbor 0
neighbor 0


In [36]:
for country in root.findall('country'):
    rank = int(country.find('rank').text)
    print(country.get('name'), rank)

Liechtenstein 1
Singapore 4
Panama 68


In [37]:
root2 = ET.fromstring("""<?xml version="1.0"?>
<actors xmlns:fictional="http://characters.example.com"
xmlns="http://people.example.com">
<actor>
<name>John Cleese</name>
<fictional:character>Lancelot</fictional:character>
<fictional:character>Archie Leach</fictional:character>
</actor>
<actor>
<name>Eric Idle</name>
<fictional:character>Sir Robin</fictional:character>
<fictional:character>Gunther</fictional:character>
<fictional:character>Commander Clement</fictional:character>
</actor>
</actors>""")

In [38]:
root2.tag

'{http://people.example.com}actors'

In [39]:
root2.findall('actor')
#returns empty 

[]

In [40]:
# inside the {} is the namespace
root2.findall('{http://people.example.com}actor')


[<Element '{http://people.example.com}actor' at 0x00000184DC9EA1D0>,
 <Element '{http://people.example.com}actor' at 0x00000184DC9EA590>]

In [43]:
root2.findall('.//{http://people.example.com}character')

[]

In [42]:
root2.findall('.//{http://characters.example.com}character')

[<Element '{http://characters.example.com}character' at 0x00000184DC9EA2C0>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA5E0>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA680>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA540>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA720>]

In [46]:
#easier way to do it
ns = {
    'fictional': 'http://characters.example.com',
    'foo': 'http://people.example.com' #default
}

In [47]:
root2.findall('foo:actor', ns)

[<Element '{http://people.example.com}actor' at 0x00000184DC9EA1D0>,
 <Element '{http://people.example.com}actor' at 0x00000184DC9EA590>]

In [48]:
root2.findall('.//fictional:character', ns)

[<Element '{http://characters.example.com}character' at 0x00000184DC9EA2C0>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA5E0>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA680>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA540>,
 <Element '{http://characters.example.com}character' at 0x00000184DC9EA720>]

'4'