diff --git a/django/utils/html.py b/django/utils/html.py index 734d7fbfb3d0..b961ed3b792b 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -342,21 +342,17 @@ def handle_word( nofollow=False, autoescape=False, ): - if "." in word or "@" in word or ":" in word: - # lead: Punctuation trimmed from the beginning of the word. - # middle: State of the word. - # trail: Punctuation trimmed from the end of the word. + # Optimize attribute lookup and guards + word_has_special = "." in word or "@" in word or ":" in word + if word_has_special: lead, middle, trail = self.trim_punctuation(word) - # Make URL we want to point to. url = None nofollow_attr = ' rel="nofollow"' if nofollow else "" - if len(middle) <= MAX_URL_LENGTH and self.simple_url_re.match(middle): + middle_len = len(middle) + if middle_len <= MAX_URL_LENGTH and self.simple_url_re.match(middle): url = smart_urlquote(html.unescape(middle)) - elif len(middle) <= MAX_URL_LENGTH and self.simple_url_2_re.match(middle): + elif middle_len <= MAX_URL_LENGTH and self.simple_url_2_re.match(middle): unescaped_middle = html.unescape(middle) - # RemovedInDjango70Warning: When the deprecation ends, replace - # with: - # url = smart_urlquote(f"https://{unescaped_middle}") protocol = ( "https" if getattr(settings, "URLIZE_ASSUME_HTTPS", False) @@ -374,13 +370,10 @@ def handle_word( url = smart_urlquote(f"{protocol}://{unescaped_middle}") elif ":" not in middle and self.is_email_simple(middle): local, domain = middle.rsplit("@", 1) - # Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA - # to the user agent. See #36013. local = quote(local, safe="") domain = quote(domain, safe="") url = self.mailto_template.format(local=local, domain=domain) nofollow_attr = "" - # Make link. if url: trimmed = self.trim_url(middle, limit=trim_url_limit) if autoescape and not safe_input: @@ -425,48 +418,71 @@ def trim_punctuation(self, word): Trim trailing and wrapping punctuation from `word`. Return the items of the new state. """ - # Strip all opening wrapping punctuation. - middle = word.lstrip(self.wrapping_punctuation_openings) + # Cache lookups/arrays for optimal speed + wrapping_punctuation = self.wrapping_punctuation + trailing_punctuation_chars = self.trailing_punctuation_chars + trailing_punctuation_chars_no_semicolon = getattr( + self, "trailing_punctuation_chars_no_semicolon", None + ) + trailing_punctuation_chars_has_semicolon = getattr( + self, "trailing_punctuation_chars_has_semicolon", False + ) + wrapping_punctuation_openings = getattr( + self, "wrapping_punctuation_openings", None + ) + middle = ( + word.lstrip(wrapping_punctuation_openings) + if wrapping_punctuation_openings + else word + ) lead = word[: len(word) - len(middle)] trail = "" - # Continue trimming until middle remains unchanged. + # Pre-count for each wrapping punctuation pair + counts = None trimmed_something = True - counts = CountsDict(word=middle) while trimmed_something and middle: trimmed_something = False + # Only count when needed in this loop + counts = {} + for opening, closing in wrapping_punctuation: + counts[opening] = middle.count(opening) + counts[closing] = middle.count(closing) + # Trim wrapping punctuation. - for opening, closing in self.wrapping_punctuation: + for opening, closing in wrapping_punctuation: if counts[opening] < counts[closing]: rstripped = middle.rstrip(closing) if rstripped != middle: strip = counts[closing] - counts[opening] - trail = middle[-strip:] + trail = middle[-strip:] + trail middle = middle[:-strip] trimmed_something = True counts[closing] -= strip amp = middle.rfind("&") if amp == -1: - rstripped = middle.rstrip(self.trailing_punctuation_chars) + rstripped = middle.rstrip(trailing_punctuation_chars) else: - rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon) + rstripped = middle.rstrip( + trailing_punctuation_chars_no_semicolon + if trailing_punctuation_chars_no_semicolon + else trailing_punctuation_chars + ) if rstripped != middle: trail = middle[len(rstripped) :] + trail middle = rstripped trimmed_something = True - if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"): + if trailing_punctuation_chars_has_semicolon and middle.endswith(";"): # Only strip if not part of an HTML entity. potential_entity = middle[amp:] escaped = html.unescape(potential_entity) if escaped == potential_entity or escaped.endswith(";"): - rstripped = middle.rstrip(self.trailing_punctuation_chars) + rstripped = middle.rstrip(trailing_punctuation_chars) trail_start = len(rstripped) amount_trailing_semicolons = len(middle) - len(middle.rstrip(";")) if amp > -1 and amount_trailing_semicolons > 1: - # Leave up to most recent semicolon as might be an - # entity. recent_semicolon = middle[trail_start:].index(";") middle_semicolon_index = recent_semicolon + trail_start + 1 trail = middle[middle_semicolon_index:] + trail