Skip to content

Commit

Permalink
Use BeautifulSoup with html5lib for better results
Browse files Browse the repository at this point in the history
  • Loading branch information
i-ky committed Nov 14, 2023
1 parent b9c8488 commit fbd342d
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 2 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ warn_unreachable = true
[[tool.mypy.overrides]]
module = [
"apiai.*",
"bs4.*",
"feedparser.*",
"gitlint.*",
"googleapiclient.*",
Expand Down
1 change: 1 addition & 0 deletions zulip/integrations/rss/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
bs4>=4.12.2
feedparser>=6.0.10
markdownify>=0.11.6
6 changes: 4 additions & 2 deletions zulip/integrations/rss/rss-bot
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ from html.parser import HTMLParser
from typing import Any, Dict, List, Optional, Tuple

import feedparser
from markdownify import markdownify
from bs4 import BeautifulSoup
from markdownify import MarkdownConverter
from typing_extensions import override

import zulip
Expand Down Expand Up @@ -193,7 +194,8 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]:
body = unwrap_text(body)

def md(html: str) -> str:
return markdownify(html, escape_underscores=False)
soup: BeautifulSoup = BeautifulSoup(html, "html5lib")
return MarkdownConverter(escape_underscores=False).convert_soup(soup)

convert: Callable[[str], str] = strip_tags if opts.strip else md
content = f"**[{entry.title}]({entry.link})**\n{convert(body)}\n{entry.link}"
Expand Down

0 comments on commit fbd342d

Please sign in to comment.