Skip to content

Commit

Permalink
Merge da6d885 into e8ddeee
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvbaldawa committed Apr 27, 2014
2 parents e8ddeee + da6d885 commit 6d00845
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Expand Up @@ -76,6 +76,7 @@ Features
Bugfixes
--------

* Fix lxml adding extra root tags being added by lxml by lxml.html.tostring
* not having typogrify installed now produces a valid error (Issue #1262)
* Pages were not rebuilt when DEMOTE_HEADERS was changed (Issue #1261)
* code.css was not rebuilt, even though there were changes in v6.4.0 to its
Expand Down
26 changes: 19 additions & 7 deletions nikola/post.py
Expand Up @@ -415,6 +415,18 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
All links in the returned HTML will be relative.
The HTML returned is a bare fragment, not a full document.
"""
def strip_root_element(el):
''' Strips root tag from an Element.
Required because lxml has an tendency to add <div>, <body>
root tags to strings which are generated by using
lxml.html.tostring()
:param Element el: the root element to strip
'''
return (el.text or '') + ''.join(
[lxml.html.tostring(child, encoding='unicode')
for child in el.iterchildren()])

if lang is None:
lang = nikola.utils.LocaleBorg().current_lang
Expand All @@ -439,10 +451,7 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
# data here is a full HTML doc, including HTML and BODY tags
# which is not ideal (Issue #464)
try:
body = document.body
data = (body.text or '') + ''.join(
[lxml.html.tostring(child, encoding='unicode')
for child in body.iterchildren()])
data = strip_root_element(document.body)
except IndexError: # No body there, it happens sometimes
pass

Expand All @@ -466,7 +475,10 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
remaining_paragraph_count=self.remaining_paragraph_count)
# This closes all open tags and sanitizes the broken HTML
document = lxml.html.fromstring(teaser)
data = lxml.html.tostring(document, encoding='unicode')
try:
data = strip_root_element(document)
except IndexError:
pass

if data and strip_html:
try:
Expand All @@ -481,9 +493,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
try:
document = lxml.html.fromstring(data)
demote_headers(document, self.demote_headers)
data = strip_root_element(document)
except (lxml.etree.ParserError, IndexError):
data = lxml.html.tostring(document, encoding='unicode')
except lxml.etree.ParserError:
pass

return data

Expand Down

0 comments on commit 6d00845

Please sign in to comment.