Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to …

…Chris Beaven.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 3b774583711e39dae7a5cfde314288f8019f59c6 1 parent 12b7c2a
Jannis Leidel authored July 14, 2011
4  django/contrib/admin/widgets.py
@@ -9,7 +9,7 @@
9 9
 from django.forms.util import flatatt
10 10
 from django.templatetags.static import static
11 11
 from django.utils.html import escape
12  
-from django.utils.text import truncate_words
  12
+from django.utils.text import Truncator
13 13
 from django.utils.translation import ugettext as _
14 14
 from django.utils.safestring import mark_safe
15 15
 from django.utils.encoding import force_unicode
@@ -152,7 +152,7 @@ def label_for_value(self, value):
152 152
         key = self.rel.get_related_field().name
153 153
         try:
154 154
             obj = self.rel.to._default_manager.using(self.db).get(**{key: value})
155  
-            return '&nbsp;<strong>%s</strong>' % escape(truncate_words(obj, 14))
  155
+            return '&nbsp;<strong>%s</strong>' % escape(Truncator(obj).words(14, truncate='...'))
156 156
         except (ValueError, self.rel.to.DoesNotExist):
157 157
             return ''
158 158
 
20  django/template/defaultfilters.py
@@ -15,7 +15,7 @@
15 15
 from django.utils.html import (conditional_escape, escapejs, fix_ampersands,
16 16
     escape, urlize as urlize_impl, linebreaks, strip_tags)
17 17
 from django.utils.http import urlquote
18  
-from django.utils.text import truncate_words, truncate_html_words, wrap, phone2numeric
  18
+from django.utils.text import Truncator, wrap, phone2numeric
19 19
 from django.utils.safestring import mark_safe, SafeData, mark_for_escaping
20 20
 from django.utils.timesince import timesince, timeuntil
21 21
 from django.utils.translation import ugettext, ungettext
@@ -244,6 +244,20 @@ def title(value):
244 244
 title.is_safe = True
245 245
 title = stringfilter(title)
246 246
 
  247
+def truncatechars(value, arg):
  248
+    """
  249
+    Truncates a string after a certain number of characters.
  250
+
  251
+    Argument: Number of characters to truncate after.
  252
+    """
  253
+    try:
  254
+        length = int(arg)
  255
+    except ValueError: # Invalid literal for int().
  256
+        return value # Fail silently.
  257
+    return Truncator(value).chars(value, length)
  258
+truncatechars.is_safe = True
  259
+truncatechars = stringfilter(truncatechars)
  260
+
247 261
 def truncatewords(value, arg):
248 262
     """
249 263
     Truncates a string after a certain number of words.
@@ -256,7 +270,7 @@ def truncatewords(value, arg):
256 270
         length = int(arg)
257 271
     except ValueError: # Invalid literal for int().
258 272
         return value # Fail silently.
259  
-    return truncate_words(value, length)
  273
+    return Truncator(value).words(length, truncate=' ...')
260 274
 truncatewords.is_safe = True
261 275
 truncatewords = stringfilter(truncatewords)
262 276
 
@@ -272,7 +286,7 @@ def truncatewords_html(value, arg):
272 286
         length = int(arg)
273 287
     except ValueError: # invalid literal for int()
274 288
         return value # Fail silently.
275  
-    return truncate_html_words(value, length)
  289
+    return Truncator(value).words(length, html=True, truncate=' ...')
276 290
 truncatewords_html.is_safe = True
277 291
 truncatewords_html = stringfilter(truncatewords_html)
278 292
 
248  django/utils/text.py
... ...
@@ -1,4 +1,6 @@
1 1
 import re
  2
+import unicodedata
  3
+import warnings
2 4
 from gzip import GzipFile
3 5
 from htmlentitydefs import name2codepoint
4 6
 
@@ -8,14 +10,18 @@
8 10
     from StringIO import StringIO
9 11
 
10 12
 from django.utils.encoding import force_unicode
11  
-from django.utils.functional import allow_lazy
12  
-from django.utils.translation import ugettext_lazy, ugettext as _
13  
-
  13
+from django.utils.functional import allow_lazy, SimpleLazyObject
  14
+from django.utils.translation import ugettext_lazy, ugettext as _, pgettext
14 15
 
15 16
 # Capitalizes the first letter of a string.
16 17
 capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
17 18
 capfirst = allow_lazy(capfirst, unicode)
18 19
 
  20
+# Set up regular expressions
  21
+re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
  22
+re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
  23
+
  24
+
19 25
 def wrap(text, width):
20 26
     """
21 27
     A word-wrap function that preserves existing line breaks and most spaces in
@@ -44,88 +50,172 @@ def _generator():
44 50
     return u''.join(_generator())
45 51
 wrap = allow_lazy(wrap, unicode)
46 52
 
47  
-def truncate_words(s, num, end_text='...'):
48  
-    """Truncates a string after a certain number of words. Takes an optional
49  
-    argument of what should be used to notify that the string has been
50  
-    truncated, defaulting to ellipsis (...)
51 53
 
52  
-    Newlines in the string will be stripped.
  54
+class Truncator(SimpleLazyObject):
53 55
     """
54  
-    s = force_unicode(s)
55  
-    length = int(num)
56  
-    words = s.split()
57  
-    if len(words) > length:
58  
-        words = words[:length]
59  
-        if not words[-1].endswith(end_text):
60  
-            words.append(end_text)
61  
-    return u' '.join(words)
62  
-truncate_words = allow_lazy(truncate_words, unicode)
  56
+    An object used to truncate text, either by characters or words.
  57
+    """
  58
+    def __init__(self, text):
  59
+        super(Truncator, self).__init__(lambda: force_unicode(text))
63 60
 
64  
-def truncate_html_words(s, num, end_text='...'):
65  
-    """Truncates HTML to a certain number of words (not counting tags and
66  
-    comments). Closes opened tags if they were correctly closed in the given
67  
-    html. Takes an optional argument of what should be used to notify that the
68  
-    string has been truncated, defaulting to ellipsis (...).
  61
+    def add_truncation_text(self, text, truncate=None):
  62
+        if truncate is None:
  63
+            truncate = pgettext(
  64
+                'String to return when truncating text',
  65
+                u'%(truncated_text)s...')
  66
+        truncate = force_unicode(truncate)
  67
+        if '%(truncated_text)s' in truncate:
  68
+            return truncate % {'truncated_text': text}
  69
+        # The truncation text didn't contain the %(truncated_text)s string
  70
+        # replacement argument so just append it to the text.
  71
+        if text.endswith(truncate):
  72
+            # But don't append the truncation text if the current text already
  73
+            # ends in this.
  74
+            return text
  75
+        return '%s%s' % (text, truncate)
69 76
 
70  
-    Newlines in the HTML are preserved.
71  
-    """
72  
-    s = force_unicode(s)
73  
-    length = int(num)
74  
-    if length <= 0:
75  
-        return u''
76  
-    html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
77  
-    # Set up regular expressions
78  
-    re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
79  
-    re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
80  
-    # Count non-HTML words and keep note of open tags
81  
-    pos = 0
82  
-    end_text_pos = 0
83  
-    words = 0
84  
-    open_tags = []
85  
-    while words <= length:
86  
-        m = re_words.search(s, pos)
87  
-        if not m:
88  
-            # Checked through whole string
89  
-            break
90  
-        pos = m.end(0)
91  
-        if m.group(1):
92  
-            # It's an actual non-HTML word
93  
-            words += 1
94  
-            if words == length:
95  
-                end_text_pos = pos
96  
-            continue
97  
-        # Check for tag
98  
-        tag = re_tag.match(m.group(0))
99  
-        if not tag or end_text_pos:
100  
-            # Don't worry about non tags or tags after our truncate point
101  
-            continue
102  
-        closing_tag, tagname, self_closing = tag.groups()
103  
-        tagname = tagname.lower()  # Element names are always case-insensitive
104  
-        if self_closing or tagname in html4_singlets:
105  
-            pass
106  
-        elif closing_tag:
107  
-            # Check for match in open tags list
108  
-            try:
109  
-                i = open_tags.index(tagname)
110  
-            except ValueError:
  77
+    def chars(self, num, truncate=None):
  78
+        """
  79
+        Returns the text truncated to be no longer than the specified number
  80
+        of characters.
  81
+
  82
+        Takes an optional argument of what should be used to notify that the
  83
+        string has been truncated, defaulting to a translatable string of an
  84
+        ellipsis (...).
  85
+        """
  86
+        length = int(num)
  87
+        text = unicodedata.normalize('NFC', self._wrapped)
  88
+
  89
+        # Calculate the length to truncate to (max length - end_text length)
  90
+        truncate_len = length
  91
+        for char in self.add_truncation_text('', truncate):
  92
+            if not unicodedata.combining(char):
  93
+                truncate_len -= 1
  94
+                if truncate_len == 0:
  95
+                    break
  96
+
  97
+        s_len = 0
  98
+        end_index = None
  99
+        for i, char in enumerate(text):
  100
+            if unicodedata.combining(char):
  101
+                # Don't consider combining characters
  102
+                # as adding to the string length
  103
+                continue
  104
+            s_len += 1
  105
+            if end_index is None and s_len > truncate_len:
  106
+                end_index = i
  107
+            if s_len > length:
  108
+                # Return the truncated string
  109
+                return self.add_truncation_text(text[:end_index or 0],
  110
+                                                truncate)
  111
+
  112
+        # Return the original string since no truncation was necessary
  113
+        return text
  114
+    chars = allow_lazy(chars)
  115
+
  116
+    def words(self, num, truncate=None, html=False):
  117
+        """
  118
+        Truncates a string after a certain number of words. Takes an optional
  119
+        argument of what should be used to notify that the string has been
  120
+        truncated, defaulting to ellipsis (...).
  121
+        """
  122
+        length = int(num)
  123
+        if html:
  124
+            return self._html_words(length, truncate)
  125
+        return self._text_words(length, truncate)
  126
+    words = allow_lazy(words)
  127
+
  128
+    def _text_words(self, length, truncate):
  129
+        """
  130
+        Truncates a string after a certain number of words.
  131
+
  132
+        Newlines in the string will be stripped.
  133
+        """
  134
+        words = self._wrapped.split()
  135
+        if len(words) > length:
  136
+            words = words[:length]
  137
+            return self.add_truncation_text(u' '.join(words), truncate)
  138
+        return u' '.join(words)
  139
+
  140
+    def _html_words(self, length, truncate):
  141
+        """
  142
+        Truncates HTML to a certain number of words (not counting tags and
  143
+        comments). Closes opened tags if they were correctly closed in the
  144
+        given HTML.
  145
+
  146
+        Newlines in the HTML are preserved.
  147
+        """
  148
+        if length <= 0:
  149
+            return u''
  150
+        html4_singlets = (
  151
+            'br', 'col', 'link', 'base', 'img',
  152
+            'param', 'area', 'hr', 'input'
  153
+        )
  154
+        # Count non-HTML words and keep note of open tags
  155
+        pos = 0
  156
+        end_text_pos = 0
  157
+        words = 0
  158
+        open_tags = []
  159
+        while words <= length:
  160
+            m = re_words.search(self._wrapped, pos)
  161
+            if not m:
  162
+                # Checked through whole string
  163
+                break
  164
+            pos = m.end(0)
  165
+            if m.group(1):
  166
+                # It's an actual non-HTML word
  167
+                words += 1
  168
+                if words == length:
  169
+                    end_text_pos = pos
  170
+                continue
  171
+            # Check for tag
  172
+            tag = re_tag.match(m.group(0))
  173
+            if not tag or end_text_pos:
  174
+                # Don't worry about non tags or tags after our truncate point
  175
+                continue
  176
+            closing_tag, tagname, self_closing = tag.groups()
  177
+            # Element names are always case-insensitive
  178
+            tagname = tagname.lower()
  179
+            if self_closing or tagname in html4_singlets:
111 180
                 pass
  181
+            elif closing_tag:
  182
+                # Check for match in open tags list
  183
+                try:
  184
+                    i = open_tags.index(tagname)
  185
+                except ValueError:
  186
+                    pass
  187
+                else:
  188
+                    # SGML: An end tag closes, back to the matching start tag,
  189
+                    # all unclosed intervening start tags with omitted end tags
  190
+                    open_tags = open_tags[i + 1:]
112 191
             else:
113  
-                # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
114  
-                open_tags = open_tags[i+1:]
115  
-        else:
116  
-            # Add it to the start of the open tags list
117  
-            open_tags.insert(0, tagname)
118  
-    if words <= length:
119  
-        # Don't try to close tags if we don't need to truncate
120  
-        return s
121  
-    out = s[:end_text_pos]
122  
-    if end_text:
123  
-        out += ' ' + end_text
124  
-    # Close any tags still open
125  
-    for tag in open_tags:
126  
-        out += '</%s>' % tag
127  
-    # Return string
128  
-    return out
  192
+                # Add it to the start of the open tags list
  193
+                open_tags.insert(0, tagname)
  194
+        if words <= length:
  195
+            # Don't try to close tags if we don't need to truncate
  196
+            return self._wrapped
  197
+        out = self._wrapped[:end_text_pos]
  198
+        truncate_text = self.add_truncation_text('', truncate)
  199
+        if truncate_text:
  200
+            out += truncate_text
  201
+        # Close any tags still open
  202
+        for tag in open_tags:
  203
+            out += '</%s>' % tag
  204
+        # Return string
  205
+        return out
  206
+
  207
+def truncate_words(s, num, end_text='...'):
  208
+    warnings.warn('This function has been deprecated. Use the Truncator class '
  209
+        'in django.utils.text instead.', category=PendingDeprecationWarning)
  210
+    truncate = end_text and ' %s' % end_text or ''
  211
+    return Truncator(s).words(num, truncate=truncate)
  212
+truncate_words = allow_lazy(truncate_words, unicode)
  213
+
  214
+def truncate_html_words(s, num, end_text='...'):
  215
+    warnings.warn('This function has been deprecated. Use the Truncator class '
  216
+        'in django.utils.text instead.', category=PendingDeprecationWarning)
  217
+    truncate = end_text and ' %s' % end_text or ''
  218
+    return Truncator(s).words(num, truncate=truncate, html=True)
129 219
 truncate_html_words = allow_lazy(truncate_html_words, unicode)
130 220
 
131 221
 def get_valid_filename(s):
6  docs/internals/deprecation.txt
@@ -220,6 +220,12 @@ their deprecation, as per the :ref:`Django deprecation policy
220 220
           was deprecated since Django 1.4 and will be removed in favor of the
221 221
           generic static files handling.
222 222
 
  223
+        * The builin truncation functions
  224
+          :func:`django.utils.text.truncate_words` and
  225
+          :func:`django.utils.text.truncate_html_words`
  226
+          were deprecated since Django 1.4 and will be removed in favor
  227
+          of the ``django.utils.text.Truncator`` class.
  228
+
223 229
     * 2.0
224 230
         * ``django.views.defaults.shortcut()``. This function has been moved
225 231
           to ``django.contrib.contenttypes.views.shortcut()`` as part of the
18  docs/ref/templates/builtins.txt
@@ -2055,6 +2055,24 @@ For example::
2055 2055
 
2056 2056
 If ``value`` is ``"my first post"``, the output will be ``"My First Post"``.
2057 2057
 
  2058
+.. templatefilter:: truncatechars
  2059
+
  2060
+truncatechars
  2061
+^^^^^^^^^^^^^
  2062
+
  2063
+.. versionadded:: 1.4
  2064
+
  2065
+Truncates a string if it is longer than the specified number of characters.
  2066
+Truncated strings will end with a translatable ellipsis sequence ("...").
  2067
+
  2068
+**Argument:** Number of characters to truncate to
  2069
+
  2070
+For example::
  2071
+
  2072
+    {{ value|truncatechars:9 }}
  2073
+
  2074
+If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
  2075
+
2058 2076
 .. templatefilter:: truncatewords
2059 2077
 
2060 2078
 truncatewords
8  docs/releases/1.4.txt
@@ -145,6 +145,14 @@ A new helper function,
145 145
 ``template.Library`` to ease the creation of template tags that store some
146 146
 data in a specified context variable.
147 147
 
  148
+``truncatechars`` template filter
  149
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  150
+
  151
+Added a filter which truncates a string to be no longer than the specified
  152
+number of characters. Truncated strings end with a translatable ellipsis
  153
+sequence ("..."). See the :tfilter:`truncatechars docs <truncatechars>` for
  154
+more details.
  155
+
148 156
 CSRF improvements
149 157
 ~~~~~~~~~~~~~~~~~
150 158
 
61  tests/regressiontests/utils/text.py
... ...
@@ -1,10 +1,69 @@
  1
+# -*- coding: utf-8 -*-
1 2
 import unittest
2 3
 
3 4
 from django.utils import text
4 5
 
5 6
 class TestUtilsText(unittest.TestCase):
6 7
 
  8
+    def test_truncate_chars(self):
  9
+        truncator = text.Truncator(
  10
+            u'The quick brown fox jumped over the lazy dog.'
  11
+        )
  12
+        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
  13
+            truncator.chars(100)),
  14
+        self.assertEqual(u'The quick brown fox ...',
  15
+            truncator.chars(23)),
  16
+        self.assertEqual(u'The quick brown fo.....',
  17
+            truncator.chars(23, '.....')),
  18
+
  19
+        # Ensure that we normalize our unicode data first
  20
+        nfc = text.Truncator(u'o\xfco\xfco\xfco\xfc')
  21
+        nfd = text.Truncator(u'ou\u0308ou\u0308ou\u0308ou\u0308')
  22
+        self.assertEqual(u'oüoüoüoü', nfc.chars(8))
  23
+        self.assertEqual(u'oüoüoüoü', nfd.chars(8))
  24
+        self.assertEqual(u'oü...', nfc.chars(5))
  25
+        self.assertEqual(u'oü...', nfd.chars(5))
  26
+
  27
+        # Ensure the final length is calculated correctly when there are
  28
+        # combining characters with no precomposed form, and that combining
  29
+        # characters are not split up.
  30
+        truncator = text.Truncator(u'-B\u030AB\u030A----8')
  31
+        self.assertEqual(u'-B\u030A...', truncator.chars(5))
  32
+        self.assertEqual(u'-B\u030AB\u030A-...', truncator.chars(7))
  33
+        self.assertEqual(u'-B\u030AB\u030A----8', truncator.chars(8))
  34
+
  35
+        # Ensure the length of the end text is correctly calculated when it
  36
+        # contains combining characters with no precomposed form.
  37
+        truncator = text.Truncator(u'-----')
  38
+        self.assertEqual(u'---B\u030A', truncator.chars(4, u'B\u030A'))
  39
+        self.assertEqual(u'-----', truncator.chars(5, u'B\u030A'))
  40
+
  41
+        # Make a best effort to shorten to the desired length, but requesting
  42
+        # a length shorter than the ellipsis shouldn't break
  43
+        self.assertEqual(u'...', text.Truncator(u'asdf').chars(1))
  44
+
7 45
     def test_truncate_words(self):
  46
+        truncator = text.Truncator(u'The quick brown fox jumped over the lazy '
  47
+            'dog.')
  48
+        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
  49
+            truncator.words(10))
  50
+        self.assertEqual(u'The quick brown fox...', truncator.words(4))
  51
+        self.assertEqual(u'The quick brown fox[snip]',
  52
+            truncator.words(4, '[snip]'))
  53
+
  54
+    def test_truncate_html_words(self):
  55
+        truncator = text.Truncator('<p><strong><em>The quick brown fox jumped '
  56
+            'over the lazy dog.</em></strong></p>')
  57
+        self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the '
  58
+            'lazy dog.</em></strong></p>', truncator.words(10, html=True))
  59
+        self.assertEqual(u'<p><strong><em>The quick brown fox...</em>'
  60
+            '</strong></p>', truncator.words(4, html=True))
  61
+        self.assertEqual(u'<p><strong><em>The quick brown fox....</em>'
  62
+            '</strong></p>', truncator.words(4, '....', html=True))
  63
+        self.assertEqual(u'<p><strong><em>The quick brown fox</em></strong>'
  64
+            '</p>', truncator.words(4, '', html=True))
  65
+
  66
+    def test_old_truncate_words(self):
8 67
         self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
9 68
             text.truncate_words(u'The quick brown fox jumped over the lazy dog.', 10))
10 69
         self.assertEqual(u'The quick brown fox ...',
@@ -12,7 +71,7 @@ def test_truncate_words(self):
12 71
         self.assertEqual(u'The quick brown fox ....',
13 72
             text.truncate_words('The quick brown fox jumped over the lazy dog.', 4, '....'))
14 73
 
15  
-    def test_truncate_html_words(self):
  74
+    def test_old_truncate_html_words(self):
16 75
         self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>',
17 76
             text.truncate_html_words('<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>', 10))
18 77
         self.assertEqual(u'<p><strong><em>The quick brown fox ...</em></strong></p>',

0 notes on commit 3b77458

Please sign in to comment.
Something went wrong with that request. Please try again.