In [1]:

from html_parser import HTMLParser
from layout import Layout
from browser import Browser
from models import Tag, SelfClosingTag, Text, ClosingTag

In [None]:
class Node:
    def __init__(self, data):
        self.data = data
        self.next = None
        self.prev = None
            
    def __repr__(self):
        return f"{self.data}"


class DoublyLinkedList:
    def __init__(self):
        self.head = None
        self.tail = None
        self.length = 0

    def __repr__(self):
        return str(self.__dict__)

    def append(self, data):
        new_node = Node(data)
        if self.head is None:
            self.head = new_node
            self.tail = self.head
        else:
            self.tail.next = new_node
            new_node.prev = self.tail
            self.tail = new_node
        self.length += 1
        return self

    def prepend(self, data):
        new_node = Node(data)
        new_node.next = self.head
        self.head.prev = new_node
        self.head = new_node
        self.length += 1
        return self

    def pretty_print(self):
        array = []
        current_node = self.head
        while current_node:
            array.append(current_node)
            current_node = current_node.next
        return array

    def traverse_list(self, index):
        node = self.head
        for i in range(index):
            node = node.next
        return node

    def insert(self, index, data):
        if index >= self.length:
            return self.append(data)
        if index == 0:
            return self.prepend(data)
        new_node = Node(data)
        prev_node = self.traverse_list(index - 1)
        move_node = prev_node.next
        new_node.next = move_node  # configure the new node next pointer
        new_node.prev = prev_node  # configure the new node previous pointer
        prev_node.next = new_node
        move_node.prev = new_node
        self.length += 1
        return self

    # def remove(self, index):
    #     if index >= self.length:
    #         raise IndexError("list index out of range")
    #     if index == 0:
    #         self.head = self.head.next
    #         self.length -= 1
    #         return self

    #     prev_node = self.traverse_list(index - 1)
    #     del_node = prev_node.next
    #     prev_node.next = del_node.next
    #     prev_node.prev = del_node.prev
    #     self.length -= 1
    #     return self

linkedlist = DoublyLinkedList()
linkedlist.append(10)
linkedlist.append(5)
linkedlist.append(16)
linkedlist.prepend(1)
linkedlist.insert(1, 6)
# linkedlist.remove(3)
# linkedlist.print_list()

In [2]:
body = """
<!DOCTYPE html>
<html>
<head>
    <title>Sample HTML</title>
    <meta charset="UTF-8">
    <link rel="stylesheet" href="style.css" />
</head>
<body>
    <h1>Welcome to Our Website</h1>
    <a href="https://www.google.com"> This is a link </a>
    <p>This is a paragraph with some <i>italic text</i>.</p>
    <img src="logo.png" alt="Our Logo" />
    <br />
    <div> This is a div </div>
    <div> This is another div </div>
    <input type="text" placeholder="Enter your name" />
</body>
</html>
"""

In [4]:
node.children[1].children[2].__dict__

{'tag': 'p',
 'attributes': {},
 'parent': <body>,
 'children': [This is a paragraph with some , <i>, ., </p>],
 'text': ''}

In [3]:
browser = Browser()
parser = HTMLParser(body)
node = parser.parse()
d_list = Layout(node).display_list

In [5]:
def print_tree(node, indent=0):
    print(" " * indent, node)
    for child in node.children:
        print_tree(child, indent+2)

In [7]:
recurse(node)

{'tag': 'html', 'attributes': {}, 'parent': None, 'children': [<head>, <body>, </html>], 'text': ''}
{'tag': 'head', 'attributes': {}, 'parent': <html>, 'children': [<title>, <meta>, <link>, </head>], 'text': ''}
{'tag': 'title', 'attributes': {}, 'parent': <head>, 'children': [Sample HTML, </title>], 'text': ''}
{'text': 'Sample HTML'}


AttributeError: 'Text' object has no attribute 'children'

: 

In [11]:
line

['html',
 '',
 'head',
 '',
 'title',
 'Sample HTML',
 '',
 '/title',
 'meta',
 '',
 'link',
 '',
 '',
 '/head',
 'body',
 '',
 'h1',
 'Welcome to Our Website',
 '',
 '/h1',
 'a',
 ' This is a link ',
 '',
 '/a',
 'p',
 'This is a paragraph with some .',
 'i',
 'italic text',
 '',
 '/i',
 '',
 '/p',
 'img',
 '',
 'br',
 '',
 'div',
 ' This is a div ',
 '',
 '/div',
 'div',
 ' This is another div ',
 '',
 '/div',
 'input',
 '',
 '',
 '/body',
 '',
 '/html']

In [6]:
print_tree(node)

 <html>
   <head>
     <title>
       </title>
     <meta>
     <link>
     </head>
   <body>
     <h1>
       </h1>
     <a>
       </a>
     <p>
       <i>
         </i>
       </p>
     <img>
     <br>
     <div>
       </div>
     <div>
       </div>
     <input>
     </body>
   </html>


In [6]:
line = []

In [5]:
def recurse(node):
    print(node.__dict__)
    "Recurse through the tree adding open and close tags as needed."
    if isinstance(node, (Tag, SelfClosingTag)):
        line.append(node.tag)
    if isinstance(node.text, str):
        text = node.text
        # for word in text.split():
        line.append(text)
    for child in node.children:
        recurse(child)
    if isinstance(node, ClosingTag):
        line.append(node.tag)

In [10]:
recurse(node)

{'tag': 'html', 'attributes': {}, 'parent': None, 'children': [<head>, <body>, </html>], 'text': ''}
{'tag': 'head', 'attributes': {}, 'parent': <html>, 'children': [<title>, <meta>, <link>, </head>], 'text': ''}
{'tag': 'title', 'attributes': {}, 'parent': <head>, 'children': [</title>], 'text': 'Sample HTML'}
{'tag': '/title', 'attributes': {}, 'parent': <title>, 'children': [], 'text': ''}
{'tag': 'meta', 'attributes': {'charset': 'UTF-8'}, 'parent': <head>, 'children': [], 'text': ''}
{'tag': 'link', 'attributes': {'rel': 'stylesheet', 'href': 'style.css', '/': ''}, 'parent': <head>, 'children': [], 'text': ''}
{'tag': '/head', 'attributes': {}, 'parent': <head>, 'children': [], 'text': ''}
{'tag': 'body', 'attributes': {}, 'parent': <html>, 'children': [<h1>, <a>, <p>, <img>, <br>, <div>, <div>, <input>, </body>], 'text': ''}
{'tag': 'h1', 'attributes': {}, 'parent': <body>, 'children': [</h1>], 'text': 'Welcome to Our Website'}
{'tag': '/h1', 'attributes': {}, 'parent': <h1>, 'ch

In [22]:
line

['html',
 'head',
 'title',
 'Sample HTML',
 '/title',
 'meta',
 'link',
 '/head',
 'body',
 'h1',
 'Welcome to Our Website',
 '/h1',
 'a',
 ' This is a link ',
 '/a',
 'p',
 '.',
 'i',
 'italic text',
 '/i',
 '/p',
 'img',
 'br',
 'div',
 ' This is a div ',
 '/div',
 'div',
 ' This is another div ',
 '/div',
 'input',
 '/body',
 '/html']