In [None]:
from bs4 import BeautifulSoup, ResultSet
from bs4.element import Tag


In [None]:
with open('lom.html') as f:
    html = f.read()
    

In [None]:
soup = BeautifulSoup(html)

In [None]:
sections: ResultSet[Tag] = soup.find_all('div', class_='liveapi_object_section')
first = sections[0]

In [None]:
from dataclasses import dataclass


def indent(text: str, level: int = 1) -> str:
    # return text
    return '\n'.join('\t' * level + x for x in text.split('\n'))

open_bracket = '[\n'
close_bracket = '\n    ]'
separator = ',\n'

def format_list(l: list) -> str:
    if not l:
        return '[]'
    return open_bracket + separator.join(indent(str(x)) for x in l) + close_bracket

@dataclass
class Child:
    name: str
    obj_type: str
    access: list[str]
    description: str

    def __str__(self):
        return f'''Child(
    name: {self.name},
    obj_type: {self.obj_type},
    access: {self.access},
    description: {self.description}
)'''

    def __repr__(self):
        return str(self)
        
@dataclass
class Property:
    name: str
    obj_type: str
    access: list[str]
    description: str

    def __str__(self):
        return f'''Property(
    name: {self.name},
    obj_type: {self.obj_type},
    access: {self.access},
    description: {self.description}
)'''

    def __repr__(self):
        return str(self)
    

@dataclass
class Function:
    name: str
    parameters: list[str]
    description: str
    returns: list[str]

    def __str__(self):
        return f'''Function(
    name: {self.name},
    parameters: {self.parameters},
    description: {self.description},
    returns: {self.returns}
)'''

    def __repr__(self):
        return str(self)


@dataclass
class Object:
    name: str
    path: str
    children: list[Child]
    properties: list[Property]
    functions: list[Function]

    def __str__(self):
        return f'''Object(
    name: {self.name},
    path: {self.path},
    children: {format_list(self.children)}
    properties: {format_list(self.properties)}
    functions: {format_list(self.functions)}
)'''

    def __repr__(self):
        return str(self)

In [None]:
import re
expr = r"[\n\s]+"

def sanitize_whitespace(f):
    def sanitizer(a):
        res = f(a)
        if isinstance(res, list):
            return [re.sub(expr, ' ', x) for x in res]
        return re.sub(expr, ' ', res)

    return sanitizer

def unwrap_text(f):

    @sanitize_whitespace
    def unwrapper(a):
        res = f(a)
        if isinstance(res, list):
            return [x.text.strip() for x in res]
        return res.text.strip()

    return unwrapper

def get_type_name(section: Tag, name_css_class:str):
    return section.select_one(f'.liveapi_{name_css_class}_name')

@unwrap_text
def get_object_name(section:Tag):
    return get_type_name(section, 'object')

@unwrap_text
def get_description(section):
    return section.select_one('.description')

def get_path(section):
    path_el = section.select_one('.path')
    if path_el:
        return path_el.text.strip().split()
    return []

def get_children(section):
    return section.select('.liveapi_child_group')

@unwrap_text
def get_child_name(child:Tag):
    return get_type_name(child, 'child')

@unwrap_text
def get_access(child):
    return child.select_one('.access').select('.value')

@unwrap_text
def get_type(child:Tag):
    tag = child.select_one('.type')
    if tag:
        return tag.select_one('.value')
    return None

def get_properties(section):
    return section.select('.liveapi_property_group')

@unwrap_text
def get_property_name(property):
    return get_type_name(property, 'property')

def get_functions(section):
    return section.select('.liveapi_function_group')

@unwrap_text
def get_function_name(function):
    return get_type_name(function, 'function')

def get_parameters(function):
    selected = function.select_one('.messagename')
    if selected:
        return selected.text.strip().split(' ')
    return []

def get_returns(function):
    result = function.select_one(':-soup-contains("Returns:")')
    if result:
        return result.text.split('Returns:')[1].strip()
    return []

def build_child(child:Tag):
    return Child(
        name=get_child_name(child),
        obj_type=get_type(child),
        access=get_access(child),
        description=get_description(child)
    )

def build_property(prop):
    return Property(
        name=get_property_name(prop),
        obj_type=get_type(prop),
        access=get_access(prop),
        description=get_description(prop)
    )

def build_function(func):
    return Function(
        name=get_function_name(func),
        parameters=get_parameters(func),
        description=get_description(func),
        returns=get_returns(func)
    )

def build_object(section):
    return Object(
        name=get_object_name(section),
        path=get_path(section),
        children=[build_child(child) for child in get_children(section)],
        properties=[build_property(prop) for prop in get_properties(section)],
        functions=[build_function(func) for func in get_functions(section)]
    )

def build_objects(sections):
    return [build_object(section) for section in sections]

In [None]:
obj = build_objects(sections)
obj

In [None]:
first
first_child = get_children(first)[0]
first_property = get_properties(first)[0]
first_function = get_functions(first)[0]
first_child