## SAX XML parser basics

In [62]:
EXAMPLE_XML_CODE_1 = """
<?xml version="1.0" encoding="UTF-8"?>
<example>
    <a>lorem ipsum</a>
    <b>dolor sit amet</b>
    <c d="e"><f/></c>
</example>
""".lstrip()

In [63]:
from xml import sax

In [64]:
class CustomHandlerClass(sax.handler.ContentHandler):
    def startElement(self, name, attrs):
        print("Start tag", name, "with attrs", attrs.items())

    def endElement(self, name):
        print("End tag", name)

    def characters(self, content):
        print("Character data", repr(content))

In [65]:
parser = sax.make_parser()
handler = CustomHandlerClass()
parser.setContentHandler(handler)
parser.feed(EXAMPLE_XML_CODE_1)

Start tag example with attrs []
Character data '\n'
Character data '    '
Start tag a with attrs []
Character data 'lorem ipsum'
End tag a
Character data '\n'
Character data '    '
Start tag b with attrs []
Character data 'dolor sit amet'
End tag b
Character data '\n'
Character data '    '
Start tag c with attrs [('d', 'e')]
Start tag f with attrs []
End tag f
End tag c
Character data '\n'
End tag example


In [67]:
EXAMPLE_XML_CODE_2 = """
<?xml version="1.0" encoding="UTF-8"?>
<example>
    <x>lorem</x>
    <y><x>ipsum</x></y>
    <x>dolor sit</x>
</example>
""".lstrip()

In [70]:
class AnotherHandlerClass(sax.handler.ContentHandler):
    is_x_tag = False
    # is_y_tag = False

    def startElement(self, name, attrs):
        # if name == "y":
        #     self.is_y_tag = True
        if name == "x":  # and self.is_y_tag:
            print("x tag")
            self.is_x_tag = True

    def endElement(self, name):
        if name == "x":
            self.is_x_tag = False
        # if name == "y":
        #     self.is_y_tag = False

    def characters(self, content):
        if self.is_x_tag:
            print(repr(content))

parser = sax.make_parser()
parser.setContentHandler(AnotherHandlerClass())
parser.feed(EXAMPLE_XML_CODE_2)

x tag
'lorem'
x tag
'ipsum'
x tag
'dolor sit'
