Skip to content

Commit

Permalink
Upgrade to Genshi 0.7.1 (#1721)
Browse files Browse the repository at this point in the history
* Upgrade to Genshi 0.7.1
* Bump web.py to 0.40-dev1 for Python 3
  • Loading branch information
tfmorris authored and mekarpeles committed Dec 21, 2018
1 parent 9b2890d commit 082b75d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 24 deletions.
9 changes: 6 additions & 3 deletions openlibrary/core/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
]
__docformat__ = "restructuredtext en"

def sanitize(html):
def sanitize(html, encoding = None):
"""Removes unsafe tags and attributes from html and adds
``rel="nofollow"`` attribute to all external links.
"""
Expand All @@ -65,8 +65,11 @@ def get_nofollow(name, event):
return 'nofollow'

try:
html = genshi.HTML(html)
except (genshi.ParseError, UnicodeDecodeError, UnicodeError):
html = genshi.HTML(html, encoding = encoding)

# except (genshi.ParseError, UnicodeDecodeError, UnicodeError) as e:
# don't catch Unicode errors so we can tell if we're getting bytes
except genshi.ParseError:
if BeautifulSoup:
# Bad html. Tidy it up using BeautifulSoup
html = str(BeautifulSoup(html, "lxml"))
Expand Down
18 changes: 9 additions & 9 deletions openlibrary/tests/core/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,28 @@

def test_sanitize():
# plain html should pass through
assert h.sanitize("hello") == "hello"
assert h.sanitize("<p>hello</p>") == "<p>hello</p>"
assert h.sanitize(u"hello") == u"hello"
assert h.sanitize(u"<p>hello</p>") == u"<p>hello</p>"

# broken html must be corrected
assert h.sanitize("<p>hello") == "<p>hello</p>"
assert h.sanitize(u"<p>hello") == u"<p>hello</p>"

# css class is fine
assert h.sanitize('<p class="foo">hello</p>') == '<p class="foo">hello</p>'
assert h.sanitize(u'<p class="foo">hello</p>') == u'<p class="foo">hello</p>'

# style attribute must be stripped
assert h.sanitize('<p style="color: red">hello</p>') == '<p>hello</p>'
assert h.sanitize(u'<p style="color: red">hello</p>') == u'<p>hello</p>'

# style tags must be stripped
assert h.sanitize('<style type="text/css">p{color: red;}</style><p>hello</p>') == '<p>hello</p>'
assert h.sanitize(u'<style type="text/css">p{color: red;}</style><p>hello</p>') == u'<p>hello</p>'

# script tags must be stripped
assert h.sanitize('<script>alert("dhoom")</script>hello') == 'hello'
assert h.sanitize(u'<script>alert("dhoom")</script>hello') == u'hello'

# rel="nofollow" must be added absolute links
assert h.sanitize('<a href="https://example.com">hello</a>') == '<a href="https://example.com" rel="nofollow">hello</a>'
assert h.sanitize(u'<a href="https://example.com">hello</a>') == u'<a href="https://example.com" rel="nofollow">hello</a>'
# relative links should pass through
assert h.sanitize('<a href="relpath">hello</a>') == '<a href="relpath">hello</a>'
assert h.sanitize(u'<a href="relpath">hello</a>') == u'<a href="relpath">hello</a>'

def test_safesort():
from datetime import datetime
Expand Down
22 changes: 11 additions & 11 deletions openlibrary/tests/core/test_olmarkdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ def p(html):
# markdown always wraps the result in <p>.
return "<p>%s\n</p>" % html

assert md("**foo**") == p("<strong>foo</strong>")
assert md("<b>foo</b>") == p('<b>foo</b>')
assert md("https://openlibrary.org") == p(
'<a href="https://openlibrary.org" rel="nofollow">' +
'https://openlibrary.org' +
'</a>'
assert md(u"**foo**") == p(u"<strong>foo</strong>")
assert md(u"<b>foo</b>") == p(u'<b>foo</b>')
assert md(u"https://openlibrary.org") == p(
u'<a href="https://openlibrary.org" rel="nofollow">' +
u'https://openlibrary.org' +
u'</a>'
)
assert md("http://example.org") == p(
'<a href="http://example.org" rel="nofollow">' +
'http://example.org' +
'</a>'
assert md(u"http://example.org") == p(
u'<a href="http://example.org" rel="nofollow">' +
u'http://example.org' +
u'</a>'
)

# why extra spaces?
assert md("a\nb") == p("a<br/>\n b")
assert md(u"a\nb") == p(u"a<br/>\n b")
2 changes: 1 addition & 1 deletion requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Babel==2.5.3
beautifulsoup4==4.6.3
DBUtils==1.3
flake8==3.6.0
Genshi==0.6; python_version < '3.0'
Genshi==0.7.1
gunicorn==19.9.0
iptools==0.6.1; python_version < '3.0'
internetarchive==1.8.1
Expand Down

0 comments on commit 082b75d

Please sign in to comment.