Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #11911 -- Made the urlize filter smarter with closing punctuation.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17362 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 15d10a5210378bba88c7dfa1f45a4d3528ddfc3f 1 parent 78c92c4
Aymeric Augustin authored January 08, 2012
27  django/utils/html.py
@@ -11,8 +11,8 @@
11 11
 from django.utils.text import normalize_newlines
12 12
 
13 13
 # Configuration for urlize() function.
14  
-LEADING_PUNCTUATION  = ['(', '<', '&lt;']
15  
-TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;']
  14
+TRAILING_PUNCTUATION = ['.', ',', ':', ';']
  15
+WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('&lt;', '&gt;')]
16 16
 
17 17
 # List of possible strings used for bullets in bulleted lists.
18 18
 DOTS = [u'&middot;', u'*', u'\u2022', u'&#149;', u'&bull;', u'&#8226;']
@@ -20,9 +20,6 @@
20 20
 unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
21 21
 unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
22 22
 word_split_re = re.compile(r'(\s+)')
23  
-punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
24  
-    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
25  
-    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
26 23
 simple_url_re = re.compile(r'^https?://\w')
27 24
 simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org|[a-z]{2})$')
28 25
 simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
@@ -147,9 +144,22 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
147 144
     for i, word in enumerate(words):
148 145
         match = None
149 146
         if '.' in word or '@' in word or ':' in word:
150  
-            match = punctuation_re.match(word)
151  
-        if match:
152  
-            lead, middle, trail = match.groups()
  147
+            # Deal with punctuation.
  148
+            lead, middle, trail = '', word, ''
  149
+            for punctuation in TRAILING_PUNCTUATION:
  150
+                if middle.endswith(punctuation):
  151
+                    middle = middle[:-len(punctuation)]
  152
+                    trail = punctuation + trail
  153
+            for opening, closing in WRAPPING_PUNCTUATION:
  154
+                if middle.startswith(opening):
  155
+                    middle = middle[len(opening):]
  156
+                    lead = lead + opening
  157
+                # Keep parentheses at the end only if they're balanced.
  158
+                if (middle.endswith(closing)
  159
+                    and middle.count(closing) == middle.count(opening) + 1):
  160
+                    middle = middle[:-len(closing)]
  161
+                    trail = closing + trail
  162
+
153 163
             # Make URL we want to point to.
154 164
             url = None
155 165
             nofollow_attr = ' rel="nofollow"' if nofollow else ''
@@ -162,6 +172,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
162 172
                 domain = domain.encode('idna')
163 173
                 url = 'mailto:%s@%s' % (local, domain)
164 174
                 nofollow_attr = ''
  175
+
165 176
             # Make link.
166 177
             if url:
167 178
                 trimmed = trim_url(middle)
8  tests/regressiontests/defaultfilters/tests.py
@@ -253,6 +253,14 @@ def test_urlize(self):
253 253
             u'<a href="http://en.wikipedia.org/wiki/Caf%C3%A9" rel="nofollow">'
254 254
             u'http://en.wikipedia.org/wiki/Café</a>')
255 255
 
  256
+        # Check urlize keeps balanced parentheses - see #11911
  257
+        self.assertEqual(urlize('http://en.wikipedia.org/wiki/Django_(web_framework)'),
  258
+            u'<a href="http://en.wikipedia.org/wiki/Django_(web_framework)" rel="nofollow">'
  259
+            u'http://en.wikipedia.org/wiki/Django_(web_framework)</a>')
  260
+        self.assertEqual(urlize('(see http://en.wikipedia.org/wiki/Django_(web_framework))'),
  261
+            u'(see <a href="http://en.wikipedia.org/wiki/Django_(web_framework)" rel="nofollow">'
  262
+            u'http://en.wikipedia.org/wiki/Django_(web_framework)</a>)')
  263
+
256 264
         # Check urlize adds nofollow properly - see #12183
257 265
         self.assertEqual(urlize('foo@bar.com or www.bar.com'),
258 266
             u'<a href="mailto:foo@bar.com">foo@bar.com</a> or '

0 notes on commit 15d10a5

Thomas Woolford

How did this escape detection for 7 months? by removing this list comprehension you disable the creation of temporary variable x and the del x # Temporary variable statement on L30 then fails. Seems to be fixed in [129f1ac] though.

(This came up in dango IRC channel: https://dl.dropbox.com/u/1204764/ssdjango.jpg)

Aymeric Augustin

Nothing was detected because there was nothing to detect! x was still created just below, in hard_coded_bullets_re.

The del x statement was removed because list comprehension no longer leak variables in the context on Python 3.

Please sign in to comment.
Something went wrong with that request. Please try again.