From e6e755e0a0c1be24fa5f02e3850751558679ce15 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Mon, 12 Dec 2016 15:02:23 +0200 Subject: [PATCH] `render_markdown` breaks links with ampersands Added few additional allowed tags to `bleach.clean` function and changed sanitization sequence so that markdown applied first and only after that result cleaned --- ckan/lib/helpers.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index bba18104ccc..47b75f07a99 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -27,7 +27,7 @@ from webhelpers.text import truncate import webhelpers.date as date from markdown import markdown -from bleach import clean as clean_html +from bleach import clean as clean_html, ALLOWED_TAGS from pylons import url as _pylons_default_url from pylons.decorators.cache import beaker_cache from pylons import config @@ -45,11 +45,18 @@ import ckan.logic as logic import ckan.lib.uploader as uploader import ckan.authz as authz - from ckan.common import ( _, ungettext, g, c, request, session, json, OrderedDict ) + +MARKDOWN_TAGS = set([ + 'del', 'dd', 'dl', 'dt', 'h1', 'h2', + 'h3', 'img', 'kbd', 'p', 'pre', 's', + 'sup', 'sub', 'strike', 'br', 'hr' +]).union(ALLOWED_TAGS) + + get_available_locales = i18n.get_available_locales get_locales_dict = i18n.get_locales_dict @@ -1727,7 +1734,7 @@ def render_markdown(data, auto_link=True, allow_html=False): data = markdown(data.strip()) else: data = RE_MD_HTML_TAGS.sub('', data.strip()) - data = markdown(clean_html(data, strip=True)) + data = clean_html(markdown(data), strip=True, tags=MARKDOWN_TAGS) # tags can be added by tag:... or tag:"...." and a link will be made # from it if auto_link: