### Basic web scraping with built-in *streaming parser* `HTMLParser`

In [9]:
# Requires creating a parser class from HTMLParser

from html.parser import HTMLParser

class Parser(HTMLParser):

    def __init__(self):
        super().__init__()
        self.recording = False
    
    # This is a built-in method
    def handle_starttag(self, tag, attrs):
        # This is where I would customise the specific HTML tag(s) that I am interested in
        if tag == "title":
            self.recording = True
        else:
            self.recording = False

    # Another built-in method
    def handle_data(self, data):
        if self.recording:
            # This is where I would customise what I want to do with the data that is found in the specific tag(s) I set above
            # print(f"Found the following data for tag: {repr(data)}")
            if data != '\n':
                print(f"Found the following data for tag: {repr(data)}")

In [7]:
content = """
<!DOCTYPE html>
<html class="client-nojs" lang="en" dir="ltr">
<head>
<meta charset="UTF-8"/>
<title>1992 World Junior Championships in Athletics – Men's high jump - Wikipedia</title>
"""

In [10]:
# Example application - content here is just a string of raw HTML (could also be actual HTML somehow?)

p = Parser()
p.feed(content)

Found the following data for tag: "1992 World Junior Championships in Athletics – Men's high jump - Wikipedia"
