Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #19496 -- Added truncatechars_html filter.

Thanks esevece for the suggestion and  Nick Sandford and Martin Warne
for the inital work on the patch.
  • Loading branch information...
commit f94f466cd3461527fb76a3e8951039a3c2388829 1 parent 390001c
@saturn597 saturn597 authored timgraham committed
View
17 django/template/defaultfilters.py
@@ -281,6 +281,23 @@ def truncatechars(value, arg):
@register.filter(is_safe=True)
@stringfilter
+def truncatechars_html(value, arg):
+ """
+ Truncates HTML after a certain number of chars.
+
+ Argument: Number of chars to truncate after.
+
+ Newlines in the HTML are preserved.
+ """
+ try:
+ length = int(arg)
+ except ValueError: # invalid literal for int()
+ return value # Fail silently.
+ return Truncator(value).chars(length, html=True)
+
+
+@register.filter(is_safe=True)
+@stringfilter
def truncatewords(value, arg):
"""
Truncates a string after a certain number of words.
View
53 django/utils/text.py
@@ -24,6 +24,7 @@
# Set up regular expressions
re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S)
+re_chars = re.compile(r'<.*?>|(.)', re.U | re.S)
re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)
re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines
re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))')
@@ -82,7 +83,7 @@ def add_truncation_text(self, text, truncate=None):
return text
return '%s%s' % (text, truncate)
- def chars(self, num, truncate=None):
+ def chars(self, num, truncate=None, html=False):
"""
Returns the text truncated to be no longer than the specified number
of characters.
@@ -101,7 +102,15 @@ def chars(self, num, truncate=None):
truncate_len -= 1
if truncate_len == 0:
break
+ if html:
+ return self._truncate_html(length, truncate, text, truncate_len, False)
+ return self._text_chars(length, truncate, text, truncate_len)
+ chars = allow_lazy(chars)
+ def _text_chars(self, length, truncate, text, truncate_len):
+ """
+ Truncates a string after a certain number of chars.
+ """
s_len = 0
end_index = None
for i, char in enumerate(text):
@@ -119,7 +128,6 @@ def chars(self, num, truncate=None):
# Return the original string since no truncation was necessary
return text
- chars = allow_lazy(chars)
def words(self, num, truncate=None, html=False):
"""
@@ -129,7 +137,7 @@ def words(self, num, truncate=None, html=False):
"""
length = int(num)
if html:
- return self._html_words(length, truncate)
+ return self._truncate_html(length, truncate, self._wrapped, length, True)
return self._text_words(length, truncate)
words = allow_lazy(words)
@@ -145,40 +153,45 @@ def _text_words(self, length, truncate):
return self.add_truncation_text(' '.join(words), truncate)
return ' '.join(words)
- def _html_words(self, length, truncate):
+ def _truncate_html(self, length, truncate, text, truncate_len, words):
"""
- Truncates HTML to a certain number of words (not counting tags and
- comments). Closes opened tags if they were correctly closed in the
- given HTML.
+ Truncates HTML to a certain number of chars (not counting tags and
+ comments), or, if words is True, then to a certain number of words.
+ Closes opened tags if they were correctly closed in the given HTML.
Newlines in the HTML are preserved.
"""
- if length <= 0:
+ if words and length <= 0:
return ''
+
html4_singlets = (
'br', 'col', 'link', 'base', 'img',
'param', 'area', 'hr', 'input'
)
- # Count non-HTML words and keep note of open tags
+
+ # Count non-HTML chars/words and keep note of open tags
pos = 0
end_text_pos = 0
- words = 0
+ current_len = 0
open_tags = []
- while words <= length:
- m = re_words.search(self._wrapped, pos)
+
+ regex = re_words if words else re_chars
+
+ while current_len <= length:
+ m = regex.search(text, pos)
if not m:
# Checked through whole string
break
pos = m.end(0)
if m.group(1):
- # It's an actual non-HTML word
- words += 1
- if words == length:
+ # It's an actual non-HTML word or char
+ current_len += 1
+ if current_len == truncate_len:
end_text_pos = pos
continue
# Check for tag
tag = re_tag.match(m.group(0))
- if not tag or end_text_pos:
+ if not tag or current_len >= truncate_len:
# Don't worry about non tags or tags after our truncate point
continue
closing_tag, tagname, self_closing = tag.groups()
@@ -199,10 +212,10 @@ def _html_words(self, length, truncate):
else:
# Add it to the start of the open tags list
open_tags.insert(0, tagname)
- if words <= length:
- # Don't try to close tags if we don't need to truncate
- return self._wrapped
- out = self._wrapped[:end_text_pos]
+
+ if current_len <= length:
+ return text
+ out = text[:end_text_pos]
truncate_text = self.add_truncation_text('', truncate)
if truncate_text:
out += truncate_text
View
20 docs/ref/templates/builtins.txt
@@ -2172,6 +2172,26 @@ For example::
If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
+.. templatefilter:: truncatechars_html
+
+truncatechars_html
+^^^^^^^^^^^^^^^^^^
+
+.. versionadded:: 1.7
+
+Similar to :tfilter:`truncatechars`, except that it is aware of HTML tags. Any
+tags that are opened in the string and not closed before the truncation point
+are closed immediately after the truncation.
+
+For example::
+
+ {{ value|truncatechars_html:9 }}
+
+If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
+``"<p>Joel i...</p>"``.
+
+Newlines in the HTML content will be preserved.
+
.. templatefilter:: truncatewords
truncatewords
View
5 docs/releases/1.7.txt
@@ -657,7 +657,7 @@ Templates
* :func:`django.shortcuts.render()`
* :func:`django.shortcuts.render_to_response()`
-* The :tfilter:`time` filter now accepts timzone-related :ref:`format
+* The :tfilter:`time` filter now accepts timezone-related :ref:`format
specifiers <date-and-time-formatting-specifiers>` ``'e'``, ``'O'`` , ``'T'``
and ``'Z'`` and is able to digest :ref:`time-zone-aware
<naive_vs_aware_datetimes>` ``datetime`` instances performing the expected
@@ -668,6 +668,9 @@ Templates
otherwise. It also now accepts an optional ``using`` keyword argument to
control which cache it uses.
+* The new :tfilter:`truncatechars_html` filter truncates a string to be no
+ longer than the specified number of characters, taking HTML into account.
+
Requests
^^^^^^^^
View
21 tests/defaultfilters/tests.py
@@ -13,8 +13,8 @@
linebreaks_filter, linenumbers, ljust, lower, make_list,
phone2numeric_filter, pluralize, removetags, rjust, slice_filter, slugify,
stringformat, striptags, time, timesince_filter, timeuntil_filter, title,
- truncatewords, truncatewords_html, unordered_list, upper, urlencode,
- urlize, urlizetrunc, wordcount, wordwrap, yesno,
+ truncatechars_html, truncatewords, truncatewords_html, unordered_list,
+ upper, urlencode, urlize, urlizetrunc, wordcount, wordwrap, yesno,
)
from django.test import TestCase
from django.utils import six
@@ -195,6 +195,23 @@ def test_truncatewords_html(self):
'&#x00bf;C&oacute;mo est&aacute;?</i>', 3),
'<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo ...</i>')
+ def test_truncatechars_html(self):
+ self.assertEqual(truncatechars_html(
+ '<p>one <a href="#">two - three <br>four</a> five</p>', 0), '...')
+ self.assertEqual(truncatechars_html('<p>one <a href="#">two - '
+ 'three <br>four</a> five</p>', 6),
+ '<p>one...</p>')
+ self.assertEqual(truncatechars_html(
+ '<p>one <a href="#">two - three <br>four</a> five</p>', 11),
+ '<p>one <a href="#">two ...</a></p>')
+ self.assertEqual(truncatechars_html(
+ '<p>one <a href="#">two - three <br>four</a> five</p>', 100),
+ '<p>one <a href="#">two - three <br>four</a> five</p>')
+ self.assertEqual(truncatechars_html(
+ '<b>\xc5ngstr\xf6m</b> was here', 5), '<b>\xc5n...</b>')
+ self.assertEqual(truncatechars_html(
+ 'a<b>b</b>c', 3), 'a<b>b</b>c')
+
def test_upper(self):
self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT')
# lowercase e umlaut
Please sign in to comment.
Something went wrong with that request. Please try again.