Skip to content

Commit

Permalink
Replace markdown library with python-markdown
Browse files Browse the repository at this point in the history
Replaces the previous markdown renderer that was problematic (and
unmaintained) with the more widely used python-markdown.

As python-markdown has deprecated the safe_mode for removing HTML, this
PR also contains bleach to strip out any HTML provided - as recommended
by the python-markdown docs.

The only noticeable change in rendering is that the previous library
inserted a newline before a closing p tag, which python-markdown doesn't
do.  This is unlikely to change how the content is rendered.
  • Loading branch information
rossjones committed Dec 1, 2015
1 parent 48521ed commit 2bd1379
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 24 deletions.
7 changes: 5 additions & 2 deletions CHANGELOG.rst
Expand Up @@ -18,6 +18,9 @@ Changes and deprecations

https://github.com/ckan/ckanext-dcat#rdf-dcat-endpoints

* The library used to render markdown has been changed to python-markdown. This
introduces both ``python-markdown`` and ``bleach`` as dependencies, as ``bleach``
is used to clean any HTML provided to the markdown processor.

v2.4.1 2015-09-02
=================
Expand Down Expand Up @@ -113,8 +116,8 @@ Changes and deprecations
Custom templates or users of this API call will need to pass
``include_datasets=True`` to include datasets in the response.

* The ``vocabulary_show`` and ``tag_show`` API calls no longer returns the
``packages`` key - i.e. datasets that use the vocabulary or tag.
* The ``vocabulary_show`` and ``tag_show`` API calls no longer returns the
``packages`` key - i.e. datasets that use the vocabulary or tag.
However ``tag_show`` now has an ``include_datasets`` option. (#1886)

* Config option ``site_url`` is now required - CKAN will not abort during
Expand Down
7 changes: 4 additions & 3 deletions ckan/lib/helpers.py
Expand Up @@ -23,10 +23,11 @@
from webhelpers.html import escape, HTML, literal, url_escape
from webhelpers.html.tools import mail_to
from webhelpers.html.tags import *
from webhelpers.markdown import markdown
from webhelpers import paginate
from webhelpers.text import truncate
import webhelpers.date as date
from markdown import markdown
from bleach import clean as clean_html
from pylons import url as _pylons_default_url
from pylons.decorators.cache import beaker_cache
from pylons import config
Expand Down Expand Up @@ -1723,10 +1724,10 @@ def render_markdown(data, auto_link=True, allow_html=False):
if not data:
return ''
if allow_html:
data = markdown(data.strip(), safe_mode=False)
data = markdown(data.strip())
else:
data = RE_MD_HTML_TAGS.sub('', data.strip())
data = markdown(data, safe_mode=True)
data = markdown(clean_html(data, strip=True))
# tags can be added by tag:... or tag:"...." and a link will be made
# from it
if auto_link:
Expand Down
25 changes: 11 additions & 14 deletions ckan/tests/legacy/misc/test_format_text.py
Expand Up @@ -10,12 +10,10 @@ def test_markdown(self):
*Some italicized text.*
'''
exp = '''<h1>Hello World</h1>
<p><strong>Some bolded text.</strong>
</p>
<p><em>Some italicized text.</em>
</p>'''
<p><strong>Some bolded text.</strong></p>
<p><em>Some italicized text.</em></p>'''
out = h.render_markdown(instr)
assert out == exp
assert out == exp, out

def test_markdown_blank(self):
instr = None
Expand All @@ -24,13 +22,13 @@ def test_markdown_blank(self):

def test_evil_markdown(self):
instr = 'Evil <script src="http://evilserver.net/evil.js";>'
exp = '''<p>Evil \n</p>'''
exp = '''<p>Evil </p>'''
out = h.render_markdown(instr)
assert out == exp, out

def test_internal_link(self):
instr = 'dataset:test-_pkg'
exp = '<p><a href="/dataset/test-_pkg">dataset:test-_pkg</a>\n</p>'
exp = '<p><a href="/dataset/test-_pkg">dataset:test-_pkg</a></p>'
out = h.render_markdown(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)

Expand All @@ -44,14 +42,14 @@ def test_internal_tag_link(self):
def test_internal_tag_linked_with_quotes(self):
"""Asserts links like 'tag:"test-tag"' work"""
instr = 'tag:"test-tag" foobar'
exp = '<p><a href="/tag/test-tag">tag:&#34;test-tag&#34;</a> foobar\n</p>'
exp = '<p><a href="/tag/test-tag">tag:&#34;test-tag&#34;</a> foobar</p>'
out = h.render_markdown(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)

def test_internal_tag_linked_with_quotes_and_space(self):
"""Asserts links like 'tag:"test tag"' work"""
instr = 'tag:"test tag" foobar'
exp = '<p><a href="/tag/test%20tag">tag:&#34;test tag&#34;</a> foobar\n</p>'
exp = '<p><a href="/tag/test%20tag">tag:&#34;test tag&#34;</a> foobar</p>'
out = h.render_markdown(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)

Expand All @@ -78,7 +76,7 @@ def test_internal_tag_with_no_closing_quote_does_not_match(self):
def test_tag_names_match_simple_punctuation(self):
"""Asserts punctuation and capital letters are matched in the tag name"""
instr = 'tag:"Test- _." foobar'
exp = '<p><a href="/tag/Test-%20_.">tag:&#34;Test- _.&#34;</a> foobar\n</p>'
exp = '<p><a href="/tag/Test-%20_.">tag:&#34;Test- _.&#34;</a> foobar</p>'
out = h.render_markdown(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)

Expand All @@ -101,7 +99,7 @@ def test_tag_names_dont_match_non_space_whitespace(self):
def test_tag_names_with_unicode_alphanumeric(self):
"""Asserts that unicode alphanumeric characters are captured"""
instr = u'tag:"Japanese katakana \u30a1" blah'
exp = u'<p><a href="/tag/Japanese%20katakana%20%E3%82%A1">tag:&#34;Japanese katakana \u30a1&#34;</a> blah\n</p>'
exp = u'<p><a href="/tag/Japanese%20katakana%20%E3%82%A1">tag:&#34;Japanese katakana \u30a1&#34;</a> blah</p>'
out = h.render_markdown(instr)
assert exp in out, u'\nGot: %s\nWanted: %s' % (out, exp)

Expand Down Expand Up @@ -135,7 +133,7 @@ def test_auto_link_after_whitespace(self):

def test_malformed_link_1(self):
instr = u'<a href=\u201dsomelink\u201d>somelink</a>'
exp = '<p>somelink\n</p>'
exp = '<p>somelink</p>'
out = h.render_markdown(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)

Expand All @@ -147,8 +145,7 @@ def test_multiline_links(self):
[yahoo]: http://search.yahoo.com/ "Yahoo Search"
[msn]: http://search.msn.com/ "MSN Search"'''
exp = '''<p>I get 10 times more traffic from <a href="http://google.com/" title="Google">Google</a> than from
<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.
</p>'''
<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>'''
# NB when this is put into Genshi, it will close the tag for you.
out = h.render_markdown(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
2 changes: 1 addition & 1 deletion ckan/tests/legacy/models/test_package.py
Expand Up @@ -106,7 +106,7 @@ def test_as_dict(self):
assert out['metadata_modified'] == pkg.metadata_modified.isoformat()
assert out['metadata_created'] == pkg.metadata_created.isoformat()
assert_equal(out['notes'], pkg.notes)
assert_equal(out['notes_rendered'], '<p>A great package like <a href="/dataset/pollution_stats">package:pollution_stats</a>\n</p>')
assert_equal(out['notes_rendered'], '<p>A great package like <a href="/dataset/pollution_stats">package:pollution_stats</a></p>')


class TestPackageWithTags:
Expand Down
13 changes: 9 additions & 4 deletions ckan/tests/lib/test_helpers.py
Expand Up @@ -132,22 +132,27 @@ def test_render_markdown_allow_html(self):

def test_render_markdown_not_allow_html(self):
data = '<h1>moo</h1>'
output = '<p>moo\n</p>'
output = '<p>moo</p>'
eq_(h.render_markdown(data), output)

def test_render_markdown_auto_link_without_path(self):
data = 'http://example.com'
output = '<p><a href="http://example.com" target="_blank" rel="nofollow">http://example.com</a>\n</p>'
output = '<p><a href="http://example.com" target="_blank" rel="nofollow">http://example.com</a></p>'
eq_(h.render_markdown(data), output)

def test_render_markdown_auto_link(self):
data = 'https://example.com/page.html'
output = '<p><a href="https://example.com/page.html" target="_blank" rel="nofollow">https://example.com/page.html</a>\n</p>'
output = '<p><a href="https://example.com/page.html" target="_blank" rel="nofollow">https://example.com/page.html</a></p>'
eq_(h.render_markdown(data), output)

def test_render_markdown_auto_link_ignoring_trailing_punctuation(self):
data = 'My link: http://example.com/page.html.'
output = '<p>My link: <a href="http://example.com/page.html" target="_blank" rel="nofollow">http://example.com/page.html</a>.\n</p>'
output = '<p>My link: <a href="http://example.com/page.html" target="_blank" rel="nofollow">http://example.com/page.html</a>.</p>'
eq_(h.render_markdown(data), output)

def test_render_naughty_markdown(self):
data = u'* [Foo (http://foo.bar) * Bar] (http://foo.bar)'
output = u'<ul>\n<li>[Foo (<a href="http://foo.bar" target="_blank" rel="nofollow">http://foo.bar</a>) * Bar] (<a href="http://foo.bar" target="_blank" rel="nofollow">http://foo.bar</a>)</li>\n</ul>'
eq_(h.render_markdown(data), output)


Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Expand Up @@ -7,12 +7,14 @@
argparse==1.4.0 # via ofs
babel==0.9.6
beaker==1.7.0
bleach==1.4.2
decorator==4.0.4 # via pylons, sqlalchemy-migrate
fanstatic==0.12
formencode==1.3.0 # via pylons
Genshi==0.6
Jinja2==2.6
mako==1.0.2 # via pylons
Markdown==2.1
markupsafe==0.23 # via mako, webhelpers
nose==1.3.7 # via pylons
ofs==0.4.1
Expand Down

0 comments on commit 2bd1379

Please sign in to comment.