In [None]:
from bs4 import BeautifulSoup
import re

def parse(html):
    if html:
        html = re.sub(r'<!\[_.*?\]>', '', html)

    try:
        soup = BeautifulSoup(html, features="html.parser")

        # Process all XBRL namespaces
        for namespace_url in [
            "http://www.xbrl.org/inlineXBRL/transformation/2010-04-25",
            "http://www.xbrl.org/inlineXBRL/transformation/2011-07-31",
            "http://www.xbrl.org/inlineXBRL/2.0"
        ]:
            for ixbrl_element in soup.find_all(attrs={"xmlns": namespace_url}):
                ixbrl_element.decompose()

        for tag_name in ["ix:nonNumeric", "ix:fraction", "ix:denominator",
                         "ix:numerator", "ix:hidden", "ix:header", "ix:relationship"]:
            for ixbrl_element in soup.find_all(tag_name):
                ixbrl_element.decompose()
        for script in soup(["script", "style"]):
            script.extract()

        text = soup.body.get_text(separator=' ', strip=True) if soup.body else ""
        cleaned_text = text.encode("ascii", "ignore").decode()

        return cleaned_text
    except Exception as e:
        print(f"Standard parsing failed: {e}")
        try:
            soup = BeautifulSoup(html, features="lxml")
            if soup.body:
                text = soup.body.get_text(separator=' ', strip=True)
                return text.encode("ascii", "ignore").decode()
            else:
                return re.sub(r'<[^>]+>', ' ', html).encode("ascii", "ignore").decode()
        except Exception as fallback_e:
            print(f"Fallback parsing failed: {fallback_e}")
            return "Parsing failed - content could not be processed"