From 4aaadabbf1eedbb275c9f9623d6e82a981554c3d Mon Sep 17 00:00:00 2001 From: tobes Date: Tue, 2 Apr 2013 13:17:21 +0100 Subject: [PATCH] [#708] Fix render_markdown() helper function --- ckan/lib/helpers.py | 70 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 50d2f27326c..a1ded633c43 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -954,7 +954,7 @@ def related_item_link(related_item_dict): def tag_link(tag): url = url_for(controller='tag', action='read', id=tag['name']) - return link_to(tag['name'], url) + return link_to(tag.get('title', tag['name']), url) def group_link(group): @@ -1365,12 +1365,76 @@ def get_request_param(parameter_name, default=None): return request.params.get(parameter_name, default) -def render_markdown(data): +# find all inner text of html eg `moo` gets `moo` but not of tags +# as this would lead to linkifying links if they are urls. +RE_MD_GET_INNER_HTML = re.compile( + r'(^|(?:<(?!a\b)[^>]*>))([^<]+)(?=<|$)', + flags=re.UNICODE +) + +# find all `internal links` eg. tag:moo, dataset:1234, tag:"my tag" +RE_MD_INTERNAL_LINK = re.compile( + r'\b(tag|package|dataset|group):((")?(?(3)[ \w\-.]+|[\w\-.]+)(?(3)"))', + flags=re.UNICODE +) + +# find external links eg http://foo.com, https:/bar.org/foobar.html +RE_MD_EXTERNAL_LINK = re.compile(r'(\bhttps?:\/\/[\w\-\.,@?^=%&;:\/~\\+#]*)', + flags=re.UNICODE +) + +# find all tags but ignore < in the strings so that we can use it correctly +# in markdown +RE_MD_HTML_TAGS = re.compile('<[^><]*>') + +def html_auto_link(data): + '''Linkifies HTML + + tag:... converted to a tag link + dataset:... converted to a dataset link + group:... converted to a group link + http://... converted to a link + ''' + def makelink(matchobj): + obj = matchobj.group(1) + name = matchobj.group(2) + title = '%s:%s' % (obj, name) + if obj == 'tag': + return tag_link({'name': name.strip('"'), + 'title': title}) + elif obj == 'group': + return group_link({'name': name, + 'title': title}) + elif obj in ['dataset', 'package']: + return dataset_link({'name': name, + 'title': title}) + + def link(matchobj): + return '%s' \ + % (matchobj.group(1), matchobj.group(1)) + + def process(matchobj): + data = matchobj.group(2) + data = RE_MD_INTERNAL_LINK.sub(makelink, data) + data = RE_MD_EXTERNAL_LINK.sub(link, data) + return matchobj.group(1) + data + + data = RE_MD_GET_INNER_HTML.sub(process, data) + return data + + +def render_markdown(data, auto_link=True): ''' returns the data as rendered markdown ''' # cope with data == None if not data: return '' - return literal(ckan.misc.MarkdownFormat().to_html(data)) + data = RE_MD_HTML_TAGS.sub('', data.strip()) + data = markdown(data, safe_mode=True) + # tags can be added by tag:... or tag:"...." and a link will be made + # from it + if auto_link: + data = html_auto_link(data) + return literal(data) def format_resource_items(items):