# XML
 - library lxml - deals with converting an XML-string to python objects and vice versa

In [None]:
import requests
import pandas as pd
from lxml import etree

In [None]:
data_string = """
<Bookstore>
   <Book ISBN="ISBN-13:978-1599620787" Price="15.23" Weight="1.5">
      <Title>New York Deco</Title>
      <Authors>
         <Author Residence="New York City">
            <First_Name>Richard</First_Name>
            <Last_Name>Berenholtz</Last_Name>
         </Author>
      </Authors>
   </Book>
   <Book ISBN="ISBN-13:978-1579128562" Price="15.80">
      <Remark>
      Five Hundred Buildings of New York and over one million other books are available for Amazon Kindle.
      </Remark>
      <Title>Five Hundred Buildings of New York</Title>
      <Authors>
         <Author Residence="Beijing">
            <First_Name>Bill</First_Name>
            <Last_Name>Harris</Last_Name>
         </Author>
         <Author Residence="New York City">
            <First_Name>Jorg</First_Name>
            <Last_Name>Brockmann</Last_Name>
         </Author>
      </Authors>
   </Book>
</Bookstore>
"""

In [None]:
root = etree.XML(data_string)
root.tag, type(root.tag)

In [None]:
print(etree.tostring(root, pretty_print=True).decode("utf-8"))

#### Iterating over complete XML tree

In [None]:
for element in root.iter():
    print(element)

#### Iterate over children in subtree, accessing tags

In [None]:
for child in root:
    print(child, child.tag)

#### Iterate to get specific tags and data
1. author tags are accessed
2. For each author tag, the .find function accesses the First_Name and Last_Name tags
3. The .find function only looks at the children, not other descendants, so be careful!
4. The .text attribute prints the text in a leaf node

In [None]:
for element in root.iter('Author'):
    print(element.find('First_Name').text, element.find('Last_Name').text)

#### Filter values of attributes
e.g. find the first name of the author of a book that weighs 1.5 oz

In [None]:
root.find('Book[@Weight="1.5"]/Authors/Author/First_Name').text