Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

i18n security fix. Details will be posted shortly to the Django maili…

…ng lists and the official weblog.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@6608 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 842a771e0527c36a9bcb3984057bb30903e71af3 1 parent cb6ecfe
@jacobian jacobian authored
View
114 django/utils/translation/trans_real.py
@@ -1,8 +1,12 @@
"Translation helper functions"
-import os, re, sys
+import locale
+import os
+import re
+import sys
import gettext as gettext_module
from cStringIO import StringIO
+
from django.utils.encoding import force_unicode
try:
@@ -25,15 +29,25 @@ def currentThread():
# The default translation is based on the settings file.
_default = None
-# This is a cache for accept-header to translation object mappings to prevent
-# the accept parser to run multiple times for one user.
+# This is a cache for normalised accept-header languages to prevent multiple
+# file lookups when checking the same locale on repeated requests.
_accepted = {}
-def to_locale(language):
+# Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9.
+accept_language_re = re.compile(r'''
+ ([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*) # "en", "en-au", "x-y-z", "*"
+ (?:;q=(0(?:\.\d{,3})?|1(?:.0{,3})?))? # Optional "q=1.00", "q=0.8"
+ (?:\s*,\s*|$) # Multiple accepts per header.
+ ''', re.VERBOSE)
+
+def to_locale(language, to_lower=False):
"Turns a language name (en-us) into a locale name (en_US)."
p = language.find('-')
if p >= 0:
- return language[:p].lower()+'_'+language[p+1:].upper()
+ if to_lower:
+ return language[:p].lower()+'_'+language[p+1:].lower()
+ else:
+ return language[:p].lower()+'_'+language[p+1:].upper()
else:
return language.lower()
@@ -334,46 +348,40 @@ def get_language_from_request(request):
if lang_code in supported and lang_code is not None and check_for_language(lang_code):
return lang_code
- lang_code = request.COOKIES.get('django_language', None)
- if lang_code in supported and lang_code is not None and check_for_language(lang_code):
+ lang_code = request.COOKIES.get('django_language')
+ if lang_code and lang_code in supported and check_for_language(lang_code):
return lang_code
- accept = request.META.get('HTTP_ACCEPT_LANGUAGE', None)
- if accept is not None:
-
- t = _accepted.get(accept, None)
- if t is not None:
- return t
-
- def _parsed(el):
- p = el.find(';q=')
- if p >= 0:
- lang = el[:p].strip()
- order = int(float(el[p+3:].strip())*100)
- else:
- lang = el
- order = 100
- p = lang.find('-')
- if p >= 0:
- mainlang = lang[:p]
- else:
- mainlang = lang
- return (lang, mainlang, order)
-
- langs = [_parsed(el) for el in accept.split(',')]
- langs.sort(lambda a,b: -1*cmp(a[2], b[2]))
-
- for lang, mainlang, order in langs:
- if lang in supported or mainlang in supported:
- langfile = gettext_module.find('django', globalpath, [to_locale(lang)])
- if langfile:
- # reconstruct the actual language from the language
- # filename, because otherwise we might incorrectly
- # report de_DE if we only have de available, but
- # did find de_DE because of language normalization
- lang = langfile[len(globalpath):].split(os.path.sep)[1]
- _accepted[accept] = lang
- return lang
+ accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
+ for lang, unused in parse_accept_lang_header(accept):
+ if lang == '*':
+ break
+
+ # We have a very restricted form for our language files (no encoding
+ # specifier, since they all must be UTF-8 and only one possible
+ # language each time. So we avoid the overhead of gettext.find() and
+ # look up the MO file manually.
+
+ normalized = locale.locale_alias.get(to_locale(lang, True))
+ if not normalized:
+ continue
+
+ # Remove the default encoding from locale_alias
+ normalized = normalized.split('.')[0]
+
+ if normalized in _accepted:
+ # We've seen this locale before and have an MO file for it, so no
+ # need to check again.
+ return _accepted[normalized]
+
+ for lang in (normalized, normalized.split('_')[0]):
+ if lang not in supported:
+ continue
+ langfile = os.path.join(globalpath, lang, 'LC_MESSAGES',
+ 'django.mo')
+ if os.path.exists(langfile):
+ _accepted[normalized] = lang
+ return lang
return settings.LANGUAGE_CODE
@@ -505,3 +513,23 @@ def templatize(src):
out.write(blankout(t.contents, 'X'))
return out.getvalue()
+def parse_accept_lang_header(lang_string):
+ """
+ Parses the lang_string, which is the body of an HTTP Accept-Language
+ header, and returns a list of (lang, q-value), ordered by 'q' values.
+
+ Any format errors in lang_string results in an empty list being returned.
+ """
+ result = []
+ pieces = accept_language_re.split(lang_string)
+ if pieces[-1]:
+ return []
+ for i in range(0, len(pieces) - 1, 3):
+ first, lang, priority = pieces[i : i + 3]
+ if first:
+ return []
+ priority = priority and float(priority) or 1.0
+ result.append((lang, priority))
+ result.sort(lambda x, y: -cmp(x[1], y[1]))
+ return result
+
View
8 tests/regressiontests/i18n/tests.py
@@ -1,6 +1,7 @@
# coding: utf-8
+import misc
-ur"""
+regressions = ur"""
Format string interpolation should work with *_lazy objects.
>>> from django.utils.translation import ugettext_lazy, activate, deactivate, gettext_lazy
@@ -39,3 +40,8 @@
>>> unicode(django.utils.translation.string_concat("dja", "ngo"))
u'django'
"""
+
+__test__ = {
+ 'regressions': regressions,
+ 'misc': misc.tests,
+}
Please sign in to comment.
Something went wrong with that request. Please try again.