Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to …

…Chris Beaven.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 3b774583711e39dae7a5cfde314288f8019f59c6 1 parent 12b7c2a
@jezdez jezdez authored
View
4 django/contrib/admin/widgets.py
@@ -9,7 +9,7 @@
from django.forms.util import flatatt
from django.templatetags.static import static
from django.utils.html import escape
-from django.utils.text import truncate_words
+from django.utils.text import Truncator
from django.utils.translation import ugettext as _
from django.utils.safestring import mark_safe
from django.utils.encoding import force_unicode
@@ -152,7 +152,7 @@ def label_for_value(self, value):
key = self.rel.get_related_field().name
try:
obj = self.rel.to._default_manager.using(self.db).get(**{key: value})
- return '&nbsp;<strong>%s</strong>' % escape(truncate_words(obj, 14))
+ return '&nbsp;<strong>%s</strong>' % escape(Truncator(obj).words(14, truncate='...'))
except (ValueError, self.rel.to.DoesNotExist):
return ''
View
20 django/template/defaultfilters.py
@@ -15,7 +15,7 @@
from django.utils.html import (conditional_escape, escapejs, fix_ampersands,
escape, urlize as urlize_impl, linebreaks, strip_tags)
from django.utils.http import urlquote
-from django.utils.text import truncate_words, truncate_html_words, wrap, phone2numeric
+from django.utils.text import Truncator, wrap, phone2numeric
from django.utils.safestring import mark_safe, SafeData, mark_for_escaping
from django.utils.timesince import timesince, timeuntil
from django.utils.translation import ugettext, ungettext
@@ -244,6 +244,20 @@ def title(value):
title.is_safe = True
title = stringfilter(title)
+def truncatechars(value, arg):
+ """
+ Truncates a string after a certain number of characters.
+
+ Argument: Number of characters to truncate after.
+ """
+ try:
+ length = int(arg)
+ except ValueError: # Invalid literal for int().
+ return value # Fail silently.
+ return Truncator(value).chars(value, length)
+truncatechars.is_safe = True
+truncatechars = stringfilter(truncatechars)
+
def truncatewords(value, arg):
"""
Truncates a string after a certain number of words.
@@ -256,7 +270,7 @@ def truncatewords(value, arg):
length = int(arg)
except ValueError: # Invalid literal for int().
return value # Fail silently.
- return truncate_words(value, length)
+ return Truncator(value).words(length, truncate=' ...')
truncatewords.is_safe = True
truncatewords = stringfilter(truncatewords)
@@ -272,7 +286,7 @@ def truncatewords_html(value, arg):
length = int(arg)
except ValueError: # invalid literal for int()
return value # Fail silently.
- return truncate_html_words(value, length)
+ return Truncator(value).words(length, html=True, truncate=' ...')
truncatewords_html.is_safe = True
truncatewords_html = stringfilter(truncatewords_html)
View
248 django/utils/text.py
@@ -1,4 +1,6 @@
import re
+import unicodedata
+import warnings
from gzip import GzipFile
from htmlentitydefs import name2codepoint
@@ -8,14 +10,18 @@
from StringIO import StringIO
from django.utils.encoding import force_unicode
-from django.utils.functional import allow_lazy
-from django.utils.translation import ugettext_lazy, ugettext as _
-
+from django.utils.functional import allow_lazy, SimpleLazyObject
+from django.utils.translation import ugettext_lazy, ugettext as _, pgettext
# Capitalizes the first letter of a string.
capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
capfirst = allow_lazy(capfirst, unicode)
+# Set up regular expressions
+re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
+re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
+
+
def wrap(text, width):
"""
A word-wrap function that preserves existing line breaks and most spaces in
@@ -44,88 +50,172 @@ def _generator():
return u''.join(_generator())
wrap = allow_lazy(wrap, unicode)
-def truncate_words(s, num, end_text='...'):
- """Truncates a string after a certain number of words. Takes an optional
- argument of what should be used to notify that the string has been
- truncated, defaulting to ellipsis (...)
- Newlines in the string will be stripped.
+class Truncator(SimpleLazyObject):
"""
- s = force_unicode(s)
- length = int(num)
- words = s.split()
- if len(words) > length:
- words = words[:length]
- if not words[-1].endswith(end_text):
- words.append(end_text)
- return u' '.join(words)
-truncate_words = allow_lazy(truncate_words, unicode)
+ An object used to truncate text, either by characters or words.
+ """
+ def __init__(self, text):
+ super(Truncator, self).__init__(lambda: force_unicode(text))
-def truncate_html_words(s, num, end_text='...'):
- """Truncates HTML to a certain number of words (not counting tags and
- comments). Closes opened tags if they were correctly closed in the given
- html. Takes an optional argument of what should be used to notify that the
- string has been truncated, defaulting to ellipsis (...).
+ def add_truncation_text(self, text, truncate=None):
+ if truncate is None:
+ truncate = pgettext(
+ 'String to return when truncating text',
+ u'%(truncated_text)s...')
+ truncate = force_unicode(truncate)
+ if '%(truncated_text)s' in truncate:
+ return truncate % {'truncated_text': text}
+ # The truncation text didn't contain the %(truncated_text)s string
+ # replacement argument so just append it to the text.
+ if text.endswith(truncate):
+ # But don't append the truncation text if the current text already
+ # ends in this.
+ return text
+ return '%s%s' % (text, truncate)
- Newlines in the HTML are preserved.
- """
- s = force_unicode(s)
- length = int(num)
- if length <= 0:
- return u''
- html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
- # Set up regular expressions
- re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
- re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
- # Count non-HTML words and keep note of open tags
- pos = 0
- end_text_pos = 0
- words = 0
- open_tags = []
- while words <= length:
- m = re_words.search(s, pos)
- if not m:
- # Checked through whole string
- break
- pos = m.end(0)
- if m.group(1):
- # It's an actual non-HTML word
- words += 1
- if words == length:
- end_text_pos = pos
- continue
- # Check for tag
- tag = re_tag.match(m.group(0))
- if not tag or end_text_pos:
- # Don't worry about non tags or tags after our truncate point
- continue
- closing_tag, tagname, self_closing = tag.groups()
- tagname = tagname.lower() # Element names are always case-insensitive
- if self_closing or tagname in html4_singlets:
- pass
- elif closing_tag:
- # Check for match in open tags list
- try:
- i = open_tags.index(tagname)
- except ValueError:
+ def chars(self, num, truncate=None):
+ """
+ Returns the text truncated to be no longer than the specified number
+ of characters.
+
+ Takes an optional argument of what should be used to notify that the
+ string has been truncated, defaulting to a translatable string of an
+ ellipsis (...).
+ """
+ length = int(num)
+ text = unicodedata.normalize('NFC', self._wrapped)
+
+ # Calculate the length to truncate to (max length - end_text length)
+ truncate_len = length
+ for char in self.add_truncation_text('', truncate):
+ if not unicodedata.combining(char):
+ truncate_len -= 1
+ if truncate_len == 0:
+ break
+
+ s_len = 0
+ end_index = None
+ for i, char in enumerate(text):
+ if unicodedata.combining(char):
+ # Don't consider combining characters
+ # as adding to the string length
+ continue
+ s_len += 1
+ if end_index is None and s_len > truncate_len:
+ end_index = i
+ if s_len > length:
+ # Return the truncated string
+ return self.add_truncation_text(text[:end_index or 0],
+ truncate)
+
+ # Return the original string since no truncation was necessary
+ return text
+ chars = allow_lazy(chars)
+
+ def words(self, num, truncate=None, html=False):
+ """
+ Truncates a string after a certain number of words. Takes an optional
+ argument of what should be used to notify that the string has been
+ truncated, defaulting to ellipsis (...).
+ """
+ length = int(num)
+ if html:
+ return self._html_words(length, truncate)
+ return self._text_words(length, truncate)
+ words = allow_lazy(words)
+
+ def _text_words(self, length, truncate):
+ """
+ Truncates a string after a certain number of words.
+
+ Newlines in the string will be stripped.
+ """
+ words = self._wrapped.split()
+ if len(words) > length:
+ words = words[:length]
+ return self.add_truncation_text(u' '.join(words), truncate)
+ return u' '.join(words)
+
+ def _html_words(self, length, truncate):
+ """
+ Truncates HTML to a certain number of words (not counting tags and
+ comments). Closes opened tags if they were correctly closed in the
+ given HTML.
+
+ Newlines in the HTML are preserved.
+ """
+ if length <= 0:
+ return u''
+ html4_singlets = (
+ 'br', 'col', 'link', 'base', 'img',
+ 'param', 'area', 'hr', 'input'
+ )
+ # Count non-HTML words and keep note of open tags
+ pos = 0
+ end_text_pos = 0
+ words = 0
+ open_tags = []
+ while words <= length:
+ m = re_words.search(self._wrapped, pos)
+ if not m:
+ # Checked through whole string
+ break
+ pos = m.end(0)
+ if m.group(1):
+ # It's an actual non-HTML word
+ words += 1
+ if words == length:
+ end_text_pos = pos
+ continue
+ # Check for tag
+ tag = re_tag.match(m.group(0))
+ if not tag or end_text_pos:
+ # Don't worry about non tags or tags after our truncate point
+ continue
+ closing_tag, tagname, self_closing = tag.groups()
+ # Element names are always case-insensitive
+ tagname = tagname.lower()
+ if self_closing or tagname in html4_singlets:
pass
+ elif closing_tag:
+ # Check for match in open tags list
+ try:
+ i = open_tags.index(tagname)
+ except ValueError:
+ pass
+ else:
+ # SGML: An end tag closes, back to the matching start tag,
+ # all unclosed intervening start tags with omitted end tags
+ open_tags = open_tags[i + 1:]
else:
- # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
- open_tags = open_tags[i+1:]
- else:
- # Add it to the start of the open tags list
- open_tags.insert(0, tagname)
- if words <= length:
- # Don't try to close tags if we don't need to truncate
- return s
- out = s[:end_text_pos]
- if end_text:
- out += ' ' + end_text
- # Close any tags still open
- for tag in open_tags:
- out += '</%s>' % tag
- # Return string
- return out
+ # Add it to the start of the open tags list
+ open_tags.insert(0, tagname)
+ if words <= length:
+ # Don't try to close tags if we don't need to truncate
+ return self._wrapped
+ out = self._wrapped[:end_text_pos]
+ truncate_text = self.add_truncation_text('', truncate)
+ if truncate_text:
+ out += truncate_text
+ # Close any tags still open
+ for tag in open_tags:
+ out += '</%s>' % tag
+ # Return string
+ return out
+
+def truncate_words(s, num, end_text='...'):
+ warnings.warn('This function has been deprecated. Use the Truncator class '
+ 'in django.utils.text instead.', category=PendingDeprecationWarning)
+ truncate = end_text and ' %s' % end_text or ''
+ return Truncator(s).words(num, truncate=truncate)
+truncate_words = allow_lazy(truncate_words, unicode)
+
+def truncate_html_words(s, num, end_text='...'):
+ warnings.warn('This function has been deprecated. Use the Truncator class '
+ 'in django.utils.text instead.', category=PendingDeprecationWarning)
+ truncate = end_text and ' %s' % end_text or ''
+ return Truncator(s).words(num, truncate=truncate, html=True)
truncate_html_words = allow_lazy(truncate_html_words, unicode)
def get_valid_filename(s):
View
6 docs/internals/deprecation.txt
@@ -220,6 +220,12 @@ their deprecation, as per the :ref:`Django deprecation policy
was deprecated since Django 1.4 and will be removed in favor of the
generic static files handling.
+ * The builin truncation functions
+ :func:`django.utils.text.truncate_words` and
+ :func:`django.utils.text.truncate_html_words`
+ were deprecated since Django 1.4 and will be removed in favor
+ of the ``django.utils.text.Truncator`` class.
+
* 2.0
* ``django.views.defaults.shortcut()``. This function has been moved
to ``django.contrib.contenttypes.views.shortcut()`` as part of the
View
18 docs/ref/templates/builtins.txt
@@ -2055,6 +2055,24 @@ For example::
If ``value`` is ``"my first post"``, the output will be ``"My First Post"``.
+.. templatefilter:: truncatechars
+
+truncatechars
+^^^^^^^^^^^^^
+
+.. versionadded:: 1.4
+
+Truncates a string if it is longer than the specified number of characters.
+Truncated strings will end with a translatable ellipsis sequence ("...").
+
+**Argument:** Number of characters to truncate to
+
+For example::
+
+ {{ value|truncatechars:9 }}
+
+If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
+
.. templatefilter:: truncatewords
truncatewords
View
8 docs/releases/1.4.txt
@@ -145,6 +145,14 @@ A new helper function,
``template.Library`` to ease the creation of template tags that store some
data in a specified context variable.
+``truncatechars`` template filter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Added a filter which truncates a string to be no longer than the specified
+number of characters. Truncated strings end with a translatable ellipsis
+sequence ("..."). See the :tfilter:`truncatechars docs <truncatechars>` for
+more details.
+
CSRF improvements
~~~~~~~~~~~~~~~~~
View
61 tests/regressiontests/utils/text.py
@@ -1,10 +1,69 @@
+# -*- coding: utf-8 -*-
import unittest
from django.utils import text
class TestUtilsText(unittest.TestCase):
+ def test_truncate_chars(self):
+ truncator = text.Truncator(
+ u'The quick brown fox jumped over the lazy dog.'
+ )
+ self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
+ truncator.chars(100)),
+ self.assertEqual(u'The quick brown fox ...',
+ truncator.chars(23)),
+ self.assertEqual(u'The quick brown fo.....',
+ truncator.chars(23, '.....')),
+
+ # Ensure that we normalize our unicode data first
+ nfc = text.Truncator(u'o\xfco\xfco\xfco\xfc')
+ nfd = text.Truncator(u'ou\u0308ou\u0308ou\u0308ou\u0308')
+ self.assertEqual(u'oüoüoüoü', nfc.chars(8))
+ self.assertEqual(u'oüoüoüoü', nfd.chars(8))
+ self.assertEqual(u'oü...', nfc.chars(5))
+ self.assertEqual(u'oü...', nfd.chars(5))
+
+ # Ensure the final length is calculated correctly when there are
+ # combining characters with no precomposed form, and that combining
+ # characters are not split up.
+ truncator = text.Truncator(u'-B\u030AB\u030A----8')
+ self.assertEqual(u'-B\u030A...', truncator.chars(5))
+ self.assertEqual(u'-B\u030AB\u030A-...', truncator.chars(7))
+ self.assertEqual(u'-B\u030AB\u030A----8', truncator.chars(8))
+
+ # Ensure the length of the end text is correctly calculated when it
+ # contains combining characters with no precomposed form.
+ truncator = text.Truncator(u'-----')
+ self.assertEqual(u'---B\u030A', truncator.chars(4, u'B\u030A'))
+ self.assertEqual(u'-----', truncator.chars(5, u'B\u030A'))
+
+ # Make a best effort to shorten to the desired length, but requesting
+ # a length shorter than the ellipsis shouldn't break
+ self.assertEqual(u'...', text.Truncator(u'asdf').chars(1))
+
def test_truncate_words(self):
+ truncator = text.Truncator(u'The quick brown fox jumped over the lazy '
+ 'dog.')
+ self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
+ truncator.words(10))
+ self.assertEqual(u'The quick brown fox...', truncator.words(4))
+ self.assertEqual(u'The quick brown fox[snip]',
+ truncator.words(4, '[snip]'))
+
+ def test_truncate_html_words(self):
+ truncator = text.Truncator('<p><strong><em>The quick brown fox jumped '
+ 'over the lazy dog.</em></strong></p>')
+ self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the '
+ 'lazy dog.</em></strong></p>', truncator.words(10, html=True))
+ self.assertEqual(u'<p><strong><em>The quick brown fox...</em>'
+ '</strong></p>', truncator.words(4, html=True))
+ self.assertEqual(u'<p><strong><em>The quick brown fox....</em>'
+ '</strong></p>', truncator.words(4, '....', html=True))
+ self.assertEqual(u'<p><strong><em>The quick brown fox</em></strong>'
+ '</p>', truncator.words(4, '', html=True))
+
+ def test_old_truncate_words(self):
self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
text.truncate_words(u'The quick brown fox jumped over the lazy dog.', 10))
self.assertEqual(u'The quick brown fox ...',
@@ -12,7 +71,7 @@ def test_truncate_words(self):
self.assertEqual(u'The quick brown fox ....',
text.truncate_words('The quick brown fox jumped over the lazy dog.', 4, '....'))
- def test_truncate_html_words(self):
+ def test_old_truncate_html_words(self):
self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>',
text.truncate_html_words('<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>', 10))
self.assertEqual(u'<p><strong><em>The quick brown fox ...</em></strong></p>',
Please sign in to comment.
Something went wrong with that request. Please try again.