In [None]:
import re

import mistune
from mistune.renderers.html import HTMLRenderer
from loguru import logger

# Telegram supported HTML tags and their attributes
TELEGRAM_HTML_TAGS = {
    'b': [],  # bold
    'i': [],  # italic
    'u': [],  # underline
    's': [],  # strikethrough
    'a': ['href'],  # links
    'code': [],  # inline code
    'pre': ['language'],  # code blocks with language
    'tg-spoiler': [],  # spoiler
    'tg-emoji': ['emoji-id'],  # custom emoji
}

class TelegramHTMLRenderer(HTMLRenderer):
    """Custom HTML renderer that only allows Telegram-supported HTML tags."""

    def __init__(self):
        super().__init__()
        self._allowed_tags = set(TELEGRAM_HTML_TAGS.keys())
        self._allowed_attrs = TELEGRAM_HTML_TAGS

    def text(self, text):
        """Render text."""
        return text

    def block_code(self, code, info=None):
        """Render code blocks with language support."""
        if info:
            return f'<pre language="{info}">{code}</pre>'
        # return f'<code>{code}</code>'
        return f'<pre>{code}</pre>'

    # def block_quote(self, text):
    #     """Render blockquotes as code blocks."""
    #     # Remove any trailing newlines to avoid extra spacing
    #     # return f'<code>1 - {text}</code>'
    #     return f'<pre>1 - {text}</pre>'

    # def inline_code(self, code):
    #     """Render inline code."""
    #     # return f'<code>{code}</code>'
    #     return f'<pre>3 - {code}</pre>'

    def emphasis(self, text):
        """Render italic text."""
        return f'<i>{text}</i>'

    def strong(self, text):
        """Render bold text."""
        return f'<b>{text}</b>'

    def strikethrough(self, text):
        """Render strikethrough text."""
        return f'<s>{text}</s>'

    def link(self, text, url, title=None):
        """Render links."""
        return f'<a href="{url}">{text}</a>'


    def heading(self, text, level):
        """Render headings as bold text."""
        return f'<b>{text}</b>\n\n'

    def paragraph(self, text):
        """Render paragraphs as plain text with newlines."""
        return f'{text}\n\n'

    def linebreak(self):
        """Render line breaks as newlines."""
        return '\n'

    def thematic_break(self):
        """Render horizontal rules as newlines."""
        return '\n\n'

    def list(self, text, ordered, **attrs):
        """Render lists as plain text with newlines."""
        return f'{text}\n\n'

    def list_item(self, text, **attrs):
        """Render list items as plain text with bullet points."""
        bullet = '• '
        return f'{bullet}{text}\n'

    def image(self, alt, url, title=None):
        """Convert images to Telegram's format: [alt](url)"""
        logger.debug(f"Converting image: {alt} -> {url}")
        return f"[{alt}]({url})"

def is_html(text: str) -> bool:
    """Check if text contains HTML tags, ignoring tags inside code blocks."""
    # First split by code blocks
    parts = []
    in_code_block = False
    current_part = []

    for line in text.split("\n"):
        if line.startswith("```"):
            if in_code_block:
                # End of code block
                parts.append("".join(current_part))
                current_part = []
            in_code_block = not in_code_block
            continue

        if not in_code_block:
            current_part.append(line + "\n")

    # Add any remaining text
    if current_part:
        parts.append("".join(current_part))

    # Check for HTML tags only in non-code parts
    html_pattern = re.compile(r"<[^>]+>")
    return any(bool(html_pattern.search(part)) for part in parts)

def markdown_to_html(text: str) -> str:
    """
    Convert Markdown text to HTML using only Telegram-supported tags.

    Args:
        text: The markdown text to convert to HTML

    Returns:
        The HTML converted text with only Telegram-supported tags
    """
    # Skip if already HTML
    if is_html(text):
        return text

    # Initialize mistune markdown parser with our custom renderer and strikethrough plugin
    markdown = mistune.create_markdown(renderer=TelegramHTMLRenderer(), plugins=['strikethrough'])
    result = markdown(text)
    logger.debug(f"Converting markdown to HTML:\nInput: {text}\nOutput: {result}")
    return str(result)  # Ensure we return a string

def old_markdown_to_html(text: str) -> str:
    import markdown
    return markdown.markdown(text)


In [None]:
text = """# The Dance of Markdown

In the realm of **digital expression**,
*Italics* sway with gentle impression.
~~Mistakes fade~~ as thoughts evolve,
While `code blocks` help problems solve.

```c++
code
```
---

> Wisdom quoted stands apart,
> Indented like whispers of the heart.

# The Markdown Voyage

**Bold** ventures into *italic* seas,
Where `code` meets poetry with ease.
> Blockquotes whisper ancient lore,
> While lists count stars on distant shore:

1. First light breaks through the dawn
2. Second wind carries us on
3. Third wish upon a falling star

```
Code blocks wrapped in silent grace
Indented text finds its place
```

Hyperlinks bridge worlds apart,
~~Strikethrough~~ memories in my heart.

---

Horizons split with simple lines,
[Links](https://example.com) like constellations shine.
~~Mistakes~~ are crossed but not forgotten,
As tables stand with columns rotten:

| Dreams | Reality |
|--------|---------|
| Vast   | Waiting |
| Bright | Fading  |


```
Within these fences code remains,
Like thoughts inside poetic brains.
```

## The journey ends
But markdown stays,
^In^ ~tiny~ ways.
"""

text = """
```
test
```

Where `code` meets poetry with ease.

```c++
code
```
"""

text =  "~~Mistakes fade~~ as thoughts evolve,"

res = markdown_to_html(text)

In [None]:
print(res)

In [None]:
res