Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #11911 -- Made the urlize filter smarter with closing punctuation.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17362 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 15d10a5210378bba88c7dfa1f45a4d3528ddfc3f 1 parent 78c92c4
@aaugustin aaugustin authored
View
27 django/utils/html.py
@@ -11,8 +11,8 @@
from django.utils.text import normalize_newlines
# Configuration for urlize() function.
-LEADING_PUNCTUATION = ['(', '<', '&lt;']
-TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;']
+TRAILING_PUNCTUATION = ['.', ',', ':', ';']
+WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('&lt;', '&gt;')]
# List of possible strings used for bullets in bulleted lists.
DOTS = [u'&middot;', u'*', u'\u2022', u'&#149;', u'&bull;', u'&#8226;']
@@ -20,9 +20,6 @@
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
word_split_re = re.compile(r'(\s+)')
-punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
- ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
- '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
@twoolie
twoolie added a note

How did this escape detection for 7 months? by removing this list comprehension you disable the creation of temporary variable x and the del x # Temporary variable statement on L30 then fails. Seems to be fixed in [129f1ac] though.

(This came up in dango IRC channel: https://dl.dropbox.com/u/1204764/ssdjango.jpg)

@aaugustin Owner

Nothing was detected because there was nothing to detect! x was still created just below, in hard_coded_bullets_re.

The del x statement was removed because list comprehension no longer leak variables in the context on Python 3.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
simple_url_re = re.compile(r'^https?://\w')
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org|[a-z]{2})$')
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
@@ -147,9 +144,22 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
for i, word in enumerate(words):
match = None
if '.' in word or '@' in word or ':' in word:
- match = punctuation_re.match(word)
- if match:
- lead, middle, trail = match.groups()
+ # Deal with punctuation.
+ lead, middle, trail = '', word, ''
+ for punctuation in TRAILING_PUNCTUATION:
+ if middle.endswith(punctuation):
+ middle = middle[:-len(punctuation)]
+ trail = punctuation + trail
+ for opening, closing in WRAPPING_PUNCTUATION:
+ if middle.startswith(opening):
+ middle = middle[len(opening):]
+ lead = lead + opening
+ # Keep parentheses at the end only if they're balanced.
+ if (middle.endswith(closing)
+ and middle.count(closing) == middle.count(opening) + 1):
+ middle = middle[:-len(closing)]
+ trail = closing + trail
+
# Make URL we want to point to.
url = None
nofollow_attr = ' rel="nofollow"' if nofollow else ''
@@ -162,6 +172,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
domain = domain.encode('idna')
url = 'mailto:%s@%s' % (local, domain)
nofollow_attr = ''
+
# Make link.
if url:
trimmed = trim_url(middle)
View
8 tests/regressiontests/defaultfilters/tests.py
@@ -253,6 +253,14 @@ def test_urlize(self):
u'<a href="http://en.wikipedia.org/wiki/Caf%C3%A9" rel="nofollow">'
u'http://en.wikipedia.org/wiki/Café</a>')
+ # Check urlize keeps balanced parentheses - see #11911
+ self.assertEqual(urlize('http://en.wikipedia.org/wiki/Django_(web_framework)'),
+ u'<a href="http://en.wikipedia.org/wiki/Django_(web_framework)" rel="nofollow">'
+ u'http://en.wikipedia.org/wiki/Django_(web_framework)</a>')
+ self.assertEqual(urlize('(see http://en.wikipedia.org/wiki/Django_(web_framework))'),
+ u'(see <a href="http://en.wikipedia.org/wiki/Django_(web_framework)" rel="nofollow">'
+ u'http://en.wikipedia.org/wiki/Django_(web_framework)</a>)')
+
# Check urlize adds nofollow properly - see #12183
self.assertEqual(urlize('foo@bar.com or www.bar.com'),
u'<a href="mailto:foo@bar.com">foo@bar.com</a> or '
Please sign in to comment.
Something went wrong with that request. Please try again.