Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #21389 -- Accept most valid language codes

By removing the 'supported' keyword from the detection methods and only relying
on a cached settings.LANGUAGES, the speed of said methods has been improved;
around 4x raw performance. This allows us to stop checking Python's incomplete
list of locales, and rely on a less restrictive regular expression for
accepting certain locales.

HTTP Accept-Language is defined as being case-insensitive, based on this fact
extra performance improvements have been made; it wouldn't make sense to
check for case differences.
  • Loading branch information...
commit 2bab9d6d9ea095c4bcaeede2df576708afd46291 1 parent 48a8b71
@Bouke Bouke authored claudep committed
View
7 django/middleware/locale.py
@@ -1,7 +1,5 @@
"This is the locale selecting middleware that will look at accept headers"
-from collections import OrderedDict
-
from django.conf import settings
from django.core.urlresolvers import (is_valid_path, get_resolver,
LocaleRegexURLResolver)
@@ -21,7 +19,6 @@ class LocaleMiddleware(object):
response_redirect_class = HttpResponseRedirect
def __init__(self):
- self._supported_languages = OrderedDict(settings.LANGUAGES)
self._is_language_prefix_patterns_used = False
for url_pattern in get_resolver(None).url_patterns:
if isinstance(url_pattern, LocaleRegexURLResolver):
@@ -37,9 +34,7 @@ def process_request(self, request):
def process_response(self, request, response):
language = translation.get_language()
- language_from_path = translation.get_language_from_path(
- request.path_info, supported=self._supported_languages
- )
+ language_from_path = translation.get_language_from_path(request.path_info)
if (response.status_code == 404 and not language_from_path
and self.is_language_prefix_patterns_used()):
urlconf = getattr(request, 'urlconf', None)
View
4 django/utils/translation/__init__.py
@@ -187,8 +187,8 @@ def get_language_from_request(request, check_path=False):
return _trans.get_language_from_request(request, check_path)
-def get_language_from_path(path, supported=None):
- return _trans.get_language_from_path(path, supported=supported)
+def get_language_from_path(path):
+ return _trans.get_language_from_path(path)
def templatize(src, origin=None):
View
2  django/utils/translation/trans_null.py
@@ -68,5 +68,5 @@ def get_language_from_request(request, check_path=False):
return settings.LANGUAGE_CODE
-def get_language_from_path(request, supported=None):
+def get_language_from_path(request):
return None
View
83 django/utils/translation/trans_real.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
from collections import OrderedDict
-import locale
import os
import re
import sys
@@ -29,9 +28,9 @@
# The default translation is based on the settings file.
_default = None
-# This is a cache for normalized accept-header languages to prevent multiple
-# file lookups when checking the same locale on repeated requests.
-_accepted = {}
+# This is a cache of settings.LANGUAGES in an OrderedDict for easy lookups by
+# key
+_supported = None
# magic gettext number to separate context from message
CONTEXT_SEPARATOR = "\x04"
@@ -63,9 +62,11 @@ def reset_cache(**kwargs):
Reset global state when LANGUAGES setting has been changed, as some
languages should no longer be accepted.
"""
- if kwargs['setting'] == 'LANGUAGES':
- global _accepted
- _accepted = {}
+ if kwargs['setting'] in ('LANGUAGES', 'LANGUAGE_CODE'):
+ global _supported
+ _supported = None
+ check_for_language.cache_clear()
+ get_supported_language_variant.cache_clear()
def to_locale(language, to_lower=False):
@@ -388,7 +389,7 @@ def all_locale_paths():
return [globalpath] + list(settings.LOCALE_PATHS)
-@lru_cache.lru_cache(maxsize=None)
+@lru_cache.lru_cache()
def check_for_language(lang_code):
"""
Checks whether there is a global language file for the given language
@@ -404,39 +405,42 @@ def check_for_language(lang_code):
return False
-def get_supported_language_variant(lang_code, supported=None, strict=False):
+@lru_cache.lru_cache(maxsize=1000)
+def get_supported_language_variant(lang_code, strict=False):
"""
Returns the language-code that's listed in supported languages, possibly
selecting a more generic variant. Raises LookupError if nothing found.
If `strict` is False (the default), the function will look for an alternative
country-specific variant when the currently checked is not found.
+
+ lru_cache should have a maxsize to prevent from memory exhaustion attacks,
+ as the provided language codes are taken from the HTTP request. See also
+ <https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
"""
- if supported is None:
+ global _supported
+ if _supported is None:
from django.conf import settings
- supported = OrderedDict(settings.LANGUAGES)
+ _supported = OrderedDict(settings.LANGUAGES)
if lang_code:
# some browsers use deprecated language codes -- #18419
replacement = _BROWSERS_DEPRECATED_LOCALES.get(lang_code)
- if lang_code not in supported and replacement in supported:
+ if lang_code not in _supported and replacement in _supported:
return replacement
- # if fr-CA is not supported, try fr-ca; if that fails, fallback to fr.
+ # if fr-ca is not supported, try fr.
generic_lang_code = lang_code.split('-')[0]
- variants = (lang_code, lang_code.lower(), generic_lang_code,
- generic_lang_code.lower())
- for code in variants:
- if code in supported and check_for_language(code):
+ for code in (lang_code, generic_lang_code):
+ if code in _supported and check_for_language(code):
return code
if not strict:
# if fr-fr is not supported, try fr-ca.
- for supported_code in supported:
- if supported_code.startswith((generic_lang_code + '-',
- generic_lang_code.lower() + '-')):
+ for supported_code in _supported:
+ if supported_code.startswith(generic_lang_code + '-'):
return supported_code
raise LookupError(lang_code)
-def get_language_from_path(path, supported=None, strict=False):
+def get_language_from_path(path, strict=False):
"""
Returns the language-code if there is a valid language-code
found in the `path`.
@@ -444,15 +448,12 @@ def get_language_from_path(path, supported=None, strict=False):
If `strict` is False (the default), the function will look for an alternative
country-specific variant when the currently checked is not found.
"""
- if supported is None:
- from django.conf import settings
- supported = OrderedDict(settings.LANGUAGES)
regex_match = language_code_prefix_re.match(path)
if not regex_match:
return None
lang_code = regex_match.group(1)
try:
- return get_supported_language_variant(lang_code, supported, strict=strict)
+ return get_supported_language_variant(lang_code, strict=strict)
except LookupError:
return None
@@ -467,25 +468,26 @@ def get_language_from_request(request, check_path=False):
If check_path is True, the URL path prefix will be checked for a language
code, otherwise this is skipped for backwards compatibility.
"""
- global _accepted
from django.conf import settings
- supported = OrderedDict(settings.LANGUAGES)
+ global _supported
+ if _supported is None:
+ _supported = OrderedDict(settings.LANGUAGES)
if check_path:
- lang_code = get_language_from_path(request.path_info, supported)
+ lang_code = get_language_from_path(request.path_info)
if lang_code is not None:
return lang_code
if hasattr(request, 'session'):
# for backwards compatibility django_language is also checked (remove in 1.8)
lang_code = request.session.get(LANGUAGE_SESSION_KEY, request.session.get('django_language'))
- if lang_code in supported and lang_code is not None and check_for_language(lang_code):
+ if lang_code in _supported and lang_code is not None and check_for_language(lang_code):
return lang_code
lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME)
try:
- return get_supported_language_variant(lang_code, supported)
+ return get_supported_language_variant(lang_code)
except LookupError:
pass
@@ -494,29 +496,16 @@ def get_language_from_request(request, check_path=False):
if accept_lang == '*':
break
- # 'normalized' is the root name of the locale in POSIX format (which is
- # the format used for the directories holding the MO files).
- normalized = locale.locale_alias.get(to_locale(accept_lang, True))
- if not normalized:
+ if not language_code_re.search(accept_lang):
continue
- # Remove the default encoding from locale_alias.
- normalized = normalized.split('.')[0]
-
- if normalized in _accepted:
- # We've seen this locale before and have an MO file for it, so no
- # need to check again.
- return _accepted[normalized]
try:
- accept_lang = get_supported_language_variant(accept_lang, supported)
+ return get_supported_language_variant(accept_lang)
except LookupError:
continue
- else:
- _accepted[normalized] = accept_lang
- return accept_lang
try:
- return get_supported_language_variant(settings.LANGUAGE_CODE, supported)
+ return get_supported_language_variant(settings.LANGUAGE_CODE)
except LookupError:
return settings.LANGUAGE_CODE
@@ -732,7 +721,7 @@ def parse_accept_lang_header(lang_string):
Any format errors in lang_string results in an empty list being returned.
"""
result = []
- pieces = accept_language_re.split(lang_string)
+ pieces = accept_language_re.split(lang_string.lower())
if pieces[-1]:
return []
for i in range(0, len(pieces) - 1, 3):
View
8 docs/releases/1.7.txt
@@ -1125,6 +1125,14 @@ Miscellaneous
For example, if you use multi-inheritance, you need to define custom primary
key fields on parent models, otherwise the default ``id`` fields will clash.
+* :meth:`~django.utils.translation.parse_accept_lang_header` now returns
+ lowercase locales, instead of the case as it was provided. As locales should
+ be treated case-insensitive this allows us to speed up locale detection.
+
+* :meth:`~django.utils.translation.get_language_from_path` and
+ :meth:`~django.utils.translation.trans_real.get_supported_language_variant`
+ now no longer have a ``supported`` argument.
+
.. _deprecated-features-1.7:
Features deprecated in 1.7
View
22 tests/i18n/tests.py
@@ -821,10 +821,10 @@ def test_parse_spec_http_header(self):
p = trans_real.parse_accept_lang_header
# Good headers.
self.assertEqual([('de', 1.0)], p('de'))
- self.assertEqual([('en-AU', 1.0)], p('en-AU'))
+ self.assertEqual([('en-au', 1.0)], p('en-AU'))
self.assertEqual([('es-419', 1.0)], p('es-419'))
self.assertEqual([('*', 1.0)], p('*;q=1.00'))
- self.assertEqual([('en-AU', 0.123)], p('en-AU;q=0.123'))
+ self.assertEqual([('en-au', 0.123)], p('en-AU;q=0.123'))
self.assertEqual([('en-au', 0.5)], p('en-au;q=0.5'))
self.assertEqual([('en-au', 1.0)], p('en-au;q=1.0'))
self.assertEqual([('da', 1.0), ('en', 0.5), ('en-gb', 0.25)], p('da, en-gb;q=0.25, en;q=0.5'))
@@ -884,6 +884,24 @@ def test_parse_literal_http_header(self):
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-cn,de'}
self.assertEqual(g(r), 'zh-cn')
+ r.META = {'HTTP_ACCEPT_LANGUAGE': 'NL'}
+ self.assertEqual('nl', g(r))
+
+ r.META = {'HTTP_ACCEPT_LANGUAGE': 'fy'}
+ self.assertEqual('fy', g(r))
+
+ r.META = {'HTTP_ACCEPT_LANGUAGE': 'ia'}
+ self.assertEqual('ia', g(r))
+
+ r.META = {'HTTP_ACCEPT_LANGUAGE': 'sr-latn'}
+ self.assertEqual('sr-latn', g(r))
+
+ r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-hans'}
+ self.assertEqual('zh-hans', g(r))
+
+ r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-hant'}
+ self.assertEqual('zh-hant', g(r))
+
@override_settings(
LANGUAGES=(
('en', 'English'),
Please sign in to comment.
Something went wrong with that request. Please try again.