Skip to content

Commit

Permalink
Merge bf75278 into 736470d
Browse files Browse the repository at this point in the history
  • Loading branch information
jemrobinson committed Jul 18, 2019
2 parents 736470d + bf75278 commit 573b2f1
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
6 changes: 5 additions & 1 deletion readabilipy/simplifiers/html.py
Expand Up @@ -196,7 +196,11 @@ def insert_paragraph_breaks(soup):
new_p_element = soup.new_tag("p")
new_p_element.string = text_fragment
parent_element.insert_after(new_p_element)
parent_element.string.replace_with(text_fragments[0])
# Replace the parent string if it exists or add one if not
if parent_element.string:
parent_element.string.replace_with(text_fragments[0])
else:
parent_element.string = text_fragments[0]
# Otherwise we want to simply include all the text fragments as independent NavigableStrings (that will be wrapped later)
else:
# Iterate in reverse order as we are repeatedly adding new elements directly after the original one
Expand Down
14 changes: 14 additions & 0 deletions tests/test_weird_html.py
Expand Up @@ -51,3 +51,17 @@ def test_tags_inside_words():
"""a<a href="http://example.com">i</a>sle""",
"<div><p>aisle</p></div>"
)


# Test splitting for unclosed tags inside paragraphs
def test_paragraph_splitting_with_unclosed_tags():
"""Ensure that paragraphs with unclosed tags inside them split correctly."""
check_exact_html_output(
"""
<p>
<meta charset="utf-8">First paragraph.
<br><br>
Second paragraph.
</p>""",
"<div><p>First paragraph.</p><p>Second paragraph.</p></div>"
)

0 comments on commit 573b2f1

Please sign in to comment.