Skip to content

Commit

Permalink
Merge pull request #3580 from getnikola/fix-3573-div-closed-too-soon
Browse files Browse the repository at this point in the history
Fix #3573, fix #3564 — fix <div> closed too soon on index page
  • Loading branch information
Kwpolska committed Oct 13, 2021
2 parents 9f10746 + 7a5e440 commit 334000e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Features
Bugfixes
--------

* Fix bug with posts after the first one appearing shifted due to a
``<div>`` closed too early (Issue #3573, #3564)
* Fix support for files outside of site root directory on Windows
* Support passing ``--backend`` and ``--db-file`` to ``nikola auto``
* Support blank values in certain WordPress-imported structures
Expand Down
16 changes: 5 additions & 11 deletions nikola/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,10 +910,7 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
if self.hyphenate:
hyphenate(document, real_lang)

try:
data = lxml.html.tostring(document.body, encoding='unicode')
except Exception:
data = lxml.html.tostring(document, encoding='unicode')
data = utils.html_tostring_fragment(document)

if teaser_only:
teaser_regexp = self.config.get('TEASER_REGEXP', TEASER_REGEXP)
Expand All @@ -936,10 +933,7 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
post_title=self.title(lang))
# This closes all open tags and sanitizes the broken HTML
document = lxml.html.fromstring(teaser)
try:
data = lxml.html.tostring(document.body, encoding='unicode')
except IndexError:
data = lxml.html.tostring(document, encoding='unicode')
data = utils.html_tostring_fragment(document)

if data and strip_html:
try:
Expand All @@ -952,11 +946,11 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
if self.demote_headers:
# see above
try:
document = lxml.html.fromstring(data)
document = lxml.html.fragment_fromstring(data, "body")
demote_headers(document, self.demote_headers)
data = lxml.html.tostring(document.body, encoding='unicode')
data = utils.html_tostring_fragment(document)
except (lxml.etree.ParserError, IndexError):
data = lxml.html.tostring(document, encoding='unicode')
pass

return data

Expand Down
20 changes: 20 additions & 0 deletions nikola/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import datetime
import hashlib
import io
import lxml.html
import operator
import os
import re
Expand Down Expand Up @@ -656,6 +657,25 @@ def get_theme_chain(theme, themes_dirs):
return themes


def html_tostring_fragment(document):
"""Convert a HTML snippet to a fragment, ready for insertion elsewhere."""
try:
doc = lxml.html.tostring(document.body, encoding='unicode').strip()
except Exception:
doc = lxml.html.tostring(document, encoding='unicode').strip()
start_fragments = ["<html>", "<body>"]
end_fragments = ["</body>", "</html>"]
for start in start_fragments:
if doc.startswith(start):
doc = doc[len(start):].strip()
print(repr(doc))
for end in end_fragments:
if doc.endswith(end):
doc = doc[:-len(end)].strip()
print(repr(doc))
return doc


INCOMPLETE_LANGUAGES_WARNED = set()


Expand Down

0 comments on commit 334000e

Please sign in to comment.