Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

[1.5.x] Fixed #19237 -- Improved strip_tags utility

The previous pattern didn't properly addressed cases where '>'
was present inside quoted tag content.
Backport of bf1871d from master.
  • Loading branch information...
commit 9efe1a7210ee161d5688f66a759bcd8d89d33142 1 parent a708b8f
@khoomeister khoomeister authored claudep committed
Showing with 5 additions and 1 deletion.
  1. +2 −1  django/utils/html.py
  2. +3 −0  tests/regressiontests/utils/html.py
View
3  django/utils/html.py
@@ -33,6 +33,7 @@
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
+strip_tags_re = re.compile(r'</?\S([^=]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE)
def escape(text):
@@ -117,7 +118,7 @@ def linebreaks(value, autoescape=False):
def strip_tags(value):
"""Returns the given HTML with all tags stripped."""
- return re.sub(r'<[^>]*?>', '', force_text(value))
+ return strip_tags_re.sub('', force_text(value))
strip_tags = allow_lazy(strip_tags)
def remove_tags(html, tags):
View
3  tests/regressiontests/utils/html.py
@@ -65,6 +65,9 @@ def test_strip_tags(self):
('<f', '<f'),
('</fe', '</fe'),
('<x>b<y>', 'b'),
+ ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
+ ('a<p a >b</p>c', 'abc'),
+ ('d<a:b c:d>e</p>f', 'def'),
)
for value, output in items:
self.check_output(f, value, output)
Please sign in to comment.
Something went wrong with that request. Please try again.