Permalink
Browse files

Fixed #20568 -- truncatewords_html no longer splits words containing …

…HTML entities.

Thanks yann0 at hotmail.com for the report.
  • Loading branch information...
1 parent 58d555c commit 40b95a24ae159b6600457a23d6c2779a18037b7b Jaap Roes committed with timgraham Jul 18, 2013
Showing with 13 additions and 1 deletion.
  1. +1 −1 django/utils/text.py
  2. +3 −0 tests/defaultfilters/tests.py
  3. +9 −0 tests/utils_tests/test_text.py
View
2 django/utils/text.py
@@ -22,7 +22,7 @@
capfirst = allow_lazy(capfirst, six.text_type)
# Set up regular expressions
-re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U|re.S)
+re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U|re.S)
re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)
View
3 tests/defaultfilters/tests.py
@@ -184,6 +184,9 @@ def test_truncatewords_html(self):
'<p>one <a href="#">two - three <br>four</a> five</p>')
self.assertEqual(truncatewords_html(
'\xc5ngstr\xf6m was here', 1), '\xc5ngstr\xf6m ...')
+ self.assertEqual(truncatewords_html('<i>Buenos d&iacute;as! '
+ '&#x00bf;C&oacute;mo est&aacute;?</i>', 3),
+ '<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo ...</i>')
def test_upper(self):
self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT')
View
9 tests/utils_tests/test_text.py
@@ -82,6 +82,15 @@ def test_truncate_html_words(self):
self.assertEqual('<br>The <hr/>quick <em>brown...</em>',
truncator.words(3, '...', html=True ))
+ # Test html entities
+ truncator = text.Truncator('<i>Buenos d&iacute;as!'
+ ' &#x00bf;C&oacute;mo est&aacute;?</i>')
+ self.assertEqual('<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo...</i>',
+ truncator.words(3, '...', html=True))
+ truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
+ self.assertEqual('<p>I &lt;3 python...</p>',
+ truncator.words(3, '...', html=True))
+
def test_wrap(self):
digits = '1234 67 9'
self.assertEqual(text.wrap(digits, 100), '1234 67 9')

0 comments on commit 40b95a2

Please sign in to comment.