# Structured Language Tools
- tools to read and write structured languages


# JSON
- JSON format very similar to python lists and dicts
- JSON used extensively in many internet protcols
- replacement for XML
- [doc](https://docs.python.org/3.6/library/json.html)

In [None]:
# convert python to JSON string

import json

data = ['foo', {'bar': ('baz', None, 1.0, 2)}]
js = json.dumps(data)
js

In [None]:
# can do 'pretty printing'

print(json.dumps(data, sort_keys=True, indent=4))

In [None]:
# convert JSON back to Python

json.loads(js)

# yaml
- superset of JSON
- [doc](https://pyyaml.org/wiki/PyYAMLDocumentation)

In [None]:
import yaml

s = yaml.dump([2,3,[5,6],{44:55, 66:77}])

print(s)

In [None]:
yaml.load(s, Loader=yaml.Loader)

# XML parser
- [doc](https://docs.python.org/3.6/library/xml.etree.elementtree.html)

In [None]:
xml= '''<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

'''

In [None]:
import xml.etree.ElementTree as ET

root = ET.fromstring(xml)

In [None]:
# top level tag

root.tag

In [None]:
# 3 tags below the root

for c in root:
    print(c, c.items(), c.find('rank').text)

In [None]:
[a,b,c] = list(root)

In [None]:
[a.items(), b.items(), c.items()]

In [None]:
# can search xml parse tree

[a.find('year').text, b.find('neighbor'), c.find('rank').text]

# HTML parser
- interesting technique
    - define methods for tags you care about
    - methods are called when tags are encountered
- [doc](https://docs.python.org/3.6/library/html.parser.html)

In [None]:
from html.parser import HTMLParser

class MyHTMLParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        print("Encountered a start tag:", tag)
    def handle_endtag(self, tag):
        print("Encountered an end tag :", tag)
    def handle_data(self, data):
        print("Encountered some data  :", data)

parser = MyHTMLParser()
parser.feed('<html><head><title>Test</title></head>'
            '<body><h1>Parse me!</h1></body></html>')