From 0db8b6a47aa740821c5fe1ff430431da6f76fd3f Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Fri, 10 May 2024 17:17:26 +0200 Subject: [PATCH] Update posts.py --- api/posts.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/api/posts.py b/api/posts.py index 55e2d7f..ea3427a 100644 --- a/api/posts.py +++ b/api/posts.py @@ -1313,8 +1313,7 @@ def get_summary(content_html: str = None, maxlen: int = 450): return None content_html = re.sub(r"(
|
|

|)", " ", content_html) content_html = re.sub(r"(h1>|h2>|h3>|h4>)", "strong> ", content_html) - # print(content_html) - # TODO: remove more content not appropriate for summary + sanitized = nh3.clean( content_html, tags={"b", "i", "em", "strong", "sub", "sup"}, @@ -1323,7 +1322,7 @@ def get_summary(content_html: str = None, maxlen: int = 450): ) sanitized = re.sub(r"\n+", " ", sanitized).strip() truncated = py_.truncate(sanitized, maxlen, omission="", separator=" ") - + # remove incomplete last sentence if len(truncated) > 0 and truncated[-1] not in [".", "!", "?", ";"]: sentences = re.split(r"(?<=\w{3}[.!?;])\s+", truncated) @@ -1335,6 +1334,10 @@ def get_summary(content_html: str = None, maxlen: int = 450): # make sure html tags are closed and trailing whitespace is removed soup = get_soup(truncated) string = soup.prettify() + + # workaround to remove script tag + script_tag = """document.addEventListener("DOMContentLoaded", () => { // Add skip link to the page let element = document.getElementById("quarto-header"); let skiplink = '<a id="skiplink" class="visually-hidden-focusable" href="#quarto-document-content">Skip to main content</a>'; element.insertAdjacentHTML("beforebegin", skiplink); });""" + string = string.replace(script_tag, "") return string.strip()