In [157]:
import xml.etree.ElementTree as ET

In [168]:
tree = ET.parse('xml-files/books.xml')
root = tree.getroot()

In [169]:
root

<Element 'bookstore' at 0x7f2a6c20f050>

In [170]:
root.tag

'bookstore'

In [171]:
root.attrib

{'type': 'generals', 'country': 'Pakistan'}

In [172]:
for child in root:
    print(child.tag, child.attrib)

book {'category': 'cooking'}
book {'category': 'children'}
book {'category': 'web'}
book {'category': 'web', 'cover': 'paperback'}


In [173]:
[elem.tag for elem in root.iter()]

['bookstore',
 'book',
 'title',
 'author',
 'year',
 'price',
 'book',
 'title',
 'author',
 'year',
 'price',
 'book',
 'title',
 'author',
 'author',
 'author',
 'author',
 'author',
 'year',
 'price',
 'book',
 'title',
 'author',
 'year',
 'price']

In [176]:
print(ET.tostring(root))

b'<bookstore country="Pakistan" type="generals">\n\n  <book category="cooking">\n    <title lang="en">Everyday Italian</title>\n    <author>Giada De Laurentiis</author>\n    <year>2005</year>\n    <price>30.00</price>\n  </book>\n\n  <book category="children">\n    <title lang="en">Harry Potter</title>\n    <author>J K. Rowling</author>\n    <year>2005</year>\n    <price>29.99</price>\n  </book>\n\n  <book category="web">\n    <title lang="en">XQuery Kick Start</title>\n    <author>James McGovern</author>\n    <author>Per Bothner</author>\n    <author>Kurt Cagle</author>\n    <author>James Linn</author>\n    <author>Vaidyanathan Nagarajan</author>\n    <year>2003</year>\n    <price>49.99</price>\n  </book>\n\n  <book category="web" cover="paperback">\n    <title lang="en">Learning XML</title>\n    <author>Erik T. Ray</author>\n    <year>2003</year>\n    <price>39.95</price>\n  </book>\n\n</bookstore>'


In [177]:
print(ET.tostring(root, encoding='utf8').decode('utf8'))

<?xml version='1.0' encoding='utf8'?>
<bookstore country="Pakistan" type="generals">

  <book category="cooking">
    <title lang="en">Everyday Italian</title>
    <author>Giada De Laurentiis</author>
    <year>2005</year>
    <price>30.00</price>
  </book>

  <book category="children">
    <title lang="en">Harry Potter</title>
    <author>J K. Rowling</author>
    <year>2005</year>
    <price>29.99</price>
  </book>

  <book category="web">
    <title lang="en">XQuery Kick Start</title>
    <author>James McGovern</author>
    <author>Per Bothner</author>
    <author>Kurt Cagle</author>
    <author>James Linn</author>
    <author>Vaidyanathan Nagarajan</author>
    <year>2003</year>
    <price>49.99</price>
  </book>

  <book category="web" cover="paperback">
    <title lang="en">Learning XML</title>
    <author>Erik T. Ray</author>
    <year>2003</year>
    <price>39.95</price>
  </book>

</bookstore>


In [179]:
for book in root.iter('book'):
    print(book.attrib)

{'category': 'cooking'}
{'category': 'children'}
{'category': 'web'}
{'category': 'web', 'cover': 'paperback'}


In [181]:
for description in root.iter('title'):
    print(description.text)

Everyday Italian
Harry Potter
XQuery Kick Start
Learning XML


In [183]:
for author in root.iter('author'):
    print(author.text)

Giada De Laurentiis
J K. Rowling
James McGovern
Per Bothner
Kurt Cagle
James Linn
Vaidyanathan Nagarajan
Erik T. Ray


In [184]:
for year in root.iter('year'):
    print(year.text)

2005
2005
2003
2003


In [186]:
# XPath Expressions
for book in root.findall("./book[1]"):
    print(book.attrib)

{'category': 'cooking'}


In [187]:
for book in root.findall("./book[2]"):
    print(book.attrib)

{'category': 'children'}


In [189]:
for title in root.findall("./book[2]/title"):
    print(title.text)

Harry Potter


In [188]:
for book in root.findall("./book[last()]"):
    print(book.attrib)

{'category': 'web', 'cover': 'paperback'}


In [None]:
for book in root.findall("./book[last()-1]"):
    print(book.attrib)

In [193]:
for book in root.findall("./book/[year='2005']"):
    print(book.attrib)

{'category': 'cooking'}
{'category': 'children'}


In [194]:
for book in root.findall("./book/[year='2003']"):
    print(book.attrib)

{'category': 'web'}
{'category': 'web', 'cover': 'paperback'}


### MOVIE.XML

In [195]:
tree = ET.parse('xml-files/movies.xml')
root = tree.getroot()

In [196]:
root.tag

'collection'

In [197]:
root.attrib

{}

In [198]:
for child in root:
    print(child.tag, child.attrib)

genre {'category': 'Action'}
genre {'category': 'Thriller'}
genre {'category': 'Comedy'}


In [199]:
[elem.tag for elem in root.iter()]

['collection',
 'genre',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'genre',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'genre',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description',
 'decade',
 'movie',
 'format',
 'year',
 'rating',
 'description']

In [200]:
print(ET.tostring(root, encoding='utf8').decode('utf8'))

<?xml version='1.0' encoding='utf8'?>
<collection>
    <genre category="Action">
        <decade years="1980s">
            <movie favorite="True" title="Indiana Jones: The raiders of the lost Ark">
                <format multiple="No">DVD</format>
                <year>1981</year>
                <rating>PG</rating>
                <description>
                Archaeologist and adventurer Indiana Jones 
                is hired by the U.S. government to find the Ark of the 
                Covenant before the Nazis.
                </description>
            </movie>
               <movie favorite="True" title="THE KARATE KID">
               <format multiple="Yes">DVD,Online</format>
               <year>1984</year>
               <rating>PG</rating>
               <description>None provided.</description>
            </movie>
            <movie favorite="False" title="Back 2 the Future">
               <format multiple="False">Blu-ray</format>
               <year>1985</year>
    

In [201]:
for movie in root.iter('movie'):
    print(movie.attrib)

{'favorite': 'True', 'title': 'Indiana Jones: The raiders of the lost Ark'}
{'favorite': 'True', 'title': 'THE KARATE KID'}
{'favorite': 'False', 'title': 'Back 2 the Future'}
{'favorite': 'False', 'title': 'X-Men'}
{'favorite': 'True', 'title': 'Batman Returns'}
{'favorite': 'False', 'title': 'Reservoir Dogs'}
{'favorite': 'False', 'title': 'ALIEN'}
{'favorite': 'True', 'title': "Ferris Bueller's Day Off"}
{'favorite': 'FALSE', 'title': 'American Psycho'}
{'favorite': 'False', 'title': 'Batman: The Movie'}
{'favorite': 'True', 'title': 'Easy A'}
{'favorite': 'True', 'title': 'Dinner for SCHMUCKS'}
{'favorite': 'False', 'title': 'Ghostbusters'}
{'favorite': 'True', 'title': 'Robin Hood: Prince of Thieves'}


In [202]:
for description in root.iter('description'):
    print(description.text)


                Archaeologist and adventurer Indiana Jones 
                is hired by the U.S. government to find the Ark of the 
                Covenant before the Nazis.
                
None provided.
Marty McFly
Two mutants come to a private academy for their kind whose resident superhero team must 
               oppose a terrorist organization with similar powers.
NA.
WhAtEvER I Want!!!?!
"""""""""
Funny movie about a funny guy
psychopathic Bateman
What a joke!
Emma Stone = Hester Prynne
Tim (Rudd) is a rising executive
                 who “succeeds” in finding the perfect guest, 
                 IRS employee Barry (Carell), for his boss’ monthly event, 
                 a so-called “dinner for idiots,” which offers certain 
                 advantages to the exec who shows up with the biggest buffoon.
                 
Who ya gonna call?
Robin Hood slaying


In [203]:
for movie in root.findall("./genre/decade/movie/[year='1992']"):
    print(movie.attrib)

{'favorite': 'True', 'title': 'Batman Returns'}
{'favorite': 'False', 'title': 'Reservoir Dogs'}


In [204]:
for movie in root.findall("./genre/decade/movie/format/[@multiple='Yes']"):
    print(movie.attrib)

{'multiple': 'Yes'}
{'multiple': 'Yes'}
{'multiple': 'Yes'}
{'multiple': 'Yes'}
{'multiple': 'Yes'}


In [206]:
for movie in root.findall("./genre/decade/movie/format[@multiple='Yes']..."):
    print(movie.attrib)

{'favorite': 'True', 'title': 'THE KARATE KID'}
{'favorite': 'False', 'title': 'X-Men'}
{'favorite': 'False', 'title': 'ALIEN'}
{'favorite': 'False', 'title': 'Batman: The Movie'}
{'favorite': 'True', 'title': 'Dinner for SCHMUCKS'}


## XML SCHEMA VALIDATION using XSD

In [212]:
import lxml.etree as ET

xml_file = ET.parse("xml-files/example1.xml")
xml_validator = ET.XMLSchema(file="xml-files/example1.xsd")

is_valid = xml_validator.validate(xml_file)

print(is_valid)

True


In [214]:
import lxml.etree as ET

xml_file = ET.parse("xml-files/example2.xml")
xml_validator = ET.XMLSchema(file="xml-files/example2.xsd")

is_valid = xml_validator.validate(xml_file)

print(is_valid)

True


In [218]:
import lxml.etree as ET

xml_file = ET.parse("xml-files/example3.xml")
xml_validator = ET.XMLSchema(file="xml-files/example3.xsd")

is_valid = xml_validator.validate(xml_file)

print(is_valid)

False


## XLST
### Extensible Stylesheet Language Transformation

In [219]:
import lxml.etree as ET

dom = ET.parse("xml-files/example4.xml")
xslt = ET.parse("xml-files/example4.xsl")
transform = ET.XSLT(xslt)
newdom = transform(dom)
html = ET.tostring(newdom)
print(html)

b'<html><body><h2>Authors</h2><table border="1"><tr bgcolor="#9acd32"><th style="text-align:left">First Name</th><th style="text-align:left">Last Name</th></tr><tr><td>Jason</td><td>Edelman</td></tr><tr><td>Scott</td><td>Lowe</td></tr><tr><td>Matt</td><td>Oswalt</td></tr></table></body></html>'


In [None]:
import lxml.etree as ET

dom = ET.parse("xml-files/example4.xml")
xslt = ET.parse("xml-files/example4.xsl")
transform = ET.XSLT(xslt)
newdom = transform(dom)
html = ET.tostring(newdom,Indent=True)
print(html)

In [None]:
import lxml.etree as ET

dom = ET.parse("xml-files/example5.xml")
xslt = ET.parse("xml-files/example5.xsl")
transform = ET.XSLT(xslt)
newdom = transform(dom)
print(ET.tostring(newdom))

In [None]:
import lxml.etree as ET

dom = ET.parse("xml-files/cdcatlog.xml")
xslt = ET.parse("xml-files/cdcatlog1.xsl")
transform = ET.XSLT(xslt)
newdom = transform(dom)
print(ET.tostring(newdom))

In [None]:
import lxml.etree as ET

dom = ET.parse("xml-files/cdcatlog.xml")
xslt = ET.parse("xml-files/cdcatlog2.xsl")
transform = ET.XSLT(xslt)
newdom = transform(dom)
print(ET.tostring(newdom))

In [None]:
import lxml.etree as ET

dom = ET.parse("xml-files/cdcatlog.xml")
xslt = ET.parse("xml-files/cdcatlog3.xsl")
transform = ET.XSLT(xslt)
newdom = transform(dom)
print(ET.tostring(newdom))

## JSON

In [None]:
import json

with open("json-files/example1.json") as f:
    data = f.read()
    json_dict = json.loads(data)

#print("The JSON document is loaded as type {0}\n".format(type(json_dict)))
#print("Now printing each item in this document and the type it contains")
#for k, v in json_dict.items():
#print(
#"-- The key {0} contains a {1} value.".format(str(k), str(type(v)))
#)

In [None]:
json_dict

In [None]:
json_dict['authors']

In [None]:
json_dict['authors'][0]

In [None]:
for k, v in json_dict.items():
    for i in v:
        print(i)