Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Browse files

[1.6.x] Improved strip_tags and clarified documentation

The fact that strip_tags cannot guarantee to really strip all
non-safe HTML content was not clear enough. Also see:
Backport of 6ca6c36 from master.
  • Loading branch information...
commit d1503afd66ca8f2f8d3819ba8a60727e0ee66cec 1 parent c8c2d60
@claudep claudep authored
32 django/utils/
@@ -115,7 +115,10 @@ def linebreaks(value, autoescape=False):
class MLStripper(HTMLParser):
def __init__(self):
- HTMLParser.__init__(self)
+ if six.PY2:
+ HTMLParser.__init__(self)
+ else:
+ HTMLParser.__init__(self, strict=False)
self.fed = []
def handle_data(self, d):
@@ -127,16 +130,37 @@ def handle_charref(self, name):
def get_data(self):
return ''.join(self.fed)
-def strip_tags(value):
- """Returns the given HTML with all tags stripped."""
+def _strip_once(value):
+ """
+ Internal tag stripping utility used by strip_tags.
+ """
s = MLStripper()
- s.close()
except HTMLParseError:
return value
+ try:
+ s.close()
+ except (HTMLParseError, UnboundLocalError) as err:
+ # UnboundLocalError because of
+ # on Python 3.2, triggered by strict=False mode of HTMLParser
+ return s.get_data() + s.rawdata
return s.get_data()
+def strip_tags(value):
+ """Returns the given HTML with all tags stripped."""
+ while True:
+ if not ('<' in value or '>' in value):
+ return value
+ new_value = _strip_once(value)
+ if new_value == value:
+ # _strip_once was not able to detect more tags
+ return value
+ else:
+ value = new_value
strip_tags = allow_lazy(strip_tags)
def remove_tags(html, tags):
12 docs/ref/templates/builtins.txt
@@ -2012,7 +2012,7 @@ If ``value`` is ``10``, the output will be ``1.000000E+01``.
-Strips all [X]HTML tags.
+Makes all possible efforts to strip all [X]HTML tags.
For example::
@@ -2021,6 +2021,16 @@ For example::
If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the
output will be ``"Joel is a slug"``.
+.. admonition:: No safety guarantee
+ Note that ``striptags`` doesn't give any guarantee about its output being
+ entirely HTML safe, particularly with non valid HTML input. So **NEVER**
+ apply the ``safe`` filter to a ``striptags`` output.
+ If you are looking for something more robust, you can use the ``bleach``
+ Python library, notably its `clean`_ method.
+.. _clean:
.. templatefilter:: time
18 docs/ref/utils.txt
@@ -616,17 +616,23 @@ escaping HTML.
.. function:: strip_tags(value)
- Removes anything that looks like an html tag from the string, that is
- anything contained within ``<>``.
+ Tries to remove anything that looks like an HTML tag from the string, that
+ is anything contained within ``<>``.
+ Absolutely NO guaranty is provided about the resulting string being entirely
+ HTML safe. So NEVER mark safe the result of a ``strip_tag`` call without
+ escaping it first, for example with :func:`~django.utils.html.escape`.
For example::
- If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the
- return value will be ``"Joel is a slug"``. Note that ``strip_tags`` result
- may still contain unsafe HTML content, so you might use
- :func:`~django.utils.html.escape` to make it a safe string.
+ If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``
+ the return value will be ``"Joel is a slug"``.
+ If you are looking for a more robust solution, take a look at the `bleach`_
+ Python library.
+ .. _bleach:
.. versionchanged:: 1.6
2  tests/utils_tests/
@@ -80,6 +80,8 @@ def test_strip_tags(self):
('a<p a >b</p>c', 'abc'),
('d<a:b c:d>e</p>f', 'def'),
('<strong>foo</strong><a href="">bar</a>', 'foobar'),
+ ('<sc<!-- -->ript>test<<!-- -->/script>', 'test'),
+ ('<script>alert()</script>&h', 'alert()&h'),
for value, output in items:
self.check_output(f, value, output)
Please sign in to comment.
Something went wrong with that request. Please try again.