Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

i18n security fix. Details will be posted shortly to the Django maili…

…ng lists and the official weblog.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@6608 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 842a771e0527c36a9bcb3984057bb30903e71af3 1 parent cb6ecfe
Jacob Kaplan-Moss authored October 26, 2007
114  django/utils/translation/trans_real.py
... ...
@@ -1,8 +1,12 @@
1 1
 "Translation helper functions"
2 2
 
3  
-import os, re, sys
  3
+import locale
  4
+import os
  5
+import re
  6
+import sys
4 7
 import gettext as gettext_module
5 8
 from cStringIO import StringIO
  9
+
6 10
 from django.utils.encoding import force_unicode
7 11
 
8 12
 try:
@@ -25,15 +29,25 @@ def currentThread():
25 29
 # The default translation is based on the settings file.
26 30
 _default = None
27 31
 
28  
-# This is a cache for accept-header to translation object mappings to prevent
29  
-# the accept parser to run multiple times for one user.
  32
+# This is a cache for normalised accept-header languages to prevent multiple
  33
+# file lookups when checking the same locale on repeated requests.
30 34
 _accepted = {}
31 35
 
32  
-def to_locale(language):
  36
+# Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9.
  37
+accept_language_re = re.compile(r'''
  38
+        ([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*)   # "en", "en-au", "x-y-z", "*"
  39
+        (?:;q=(0(?:\.\d{,3})?|1(?:.0{,3})?))?   # Optional "q=1.00", "q=0.8"
  40
+        (?:\s*,\s*|$)                            # Multiple accepts per header.
  41
+        ''', re.VERBOSE)
  42
+
  43
+def to_locale(language, to_lower=False):
33 44
     "Turns a language name (en-us) into a locale name (en_US)."
34 45
     p = language.find('-')
35 46
     if p >= 0:
36  
-        return language[:p].lower()+'_'+language[p+1:].upper()
  47
+        if to_lower:
  48
+            return language[:p].lower()+'_'+language[p+1:].lower()
  49
+        else:
  50
+            return language[:p].lower()+'_'+language[p+1:].upper()
37 51
     else:
38 52
         return language.lower()
39 53
 
@@ -334,46 +348,40 @@ def get_language_from_request(request):
334 348
         if lang_code in supported and lang_code is not None and check_for_language(lang_code):
335 349
             return lang_code
336 350
 
337  
-    lang_code = request.COOKIES.get('django_language', None)
338  
-    if lang_code in supported and lang_code is not None and check_for_language(lang_code):
  351
+    lang_code = request.COOKIES.get('django_language')
  352
+    if lang_code and lang_code in supported and check_for_language(lang_code):
339 353
         return lang_code
340 354
 
341  
-    accept = request.META.get('HTTP_ACCEPT_LANGUAGE', None)
342  
-    if accept is not None:
343  
-
344  
-        t = _accepted.get(accept, None)
345  
-        if t is not None:
346  
-            return t
347  
-
348  
-        def _parsed(el):
349  
-            p = el.find(';q=')
350  
-            if p >= 0:
351  
-                lang = el[:p].strip()
352  
-                order = int(float(el[p+3:].strip())*100)
353  
-            else:
354  
-                lang = el
355  
-                order = 100
356  
-            p = lang.find('-')
357  
-            if p >= 0:
358  
-                mainlang = lang[:p]
359  
-            else:
360  
-                mainlang = lang
361  
-            return (lang, mainlang, order)
362  
-
363  
-        langs = [_parsed(el) for el in accept.split(',')]
364  
-        langs.sort(lambda a,b: -1*cmp(a[2], b[2]))
365  
-
366  
-        for lang, mainlang, order in langs:
367  
-            if lang in supported or mainlang in supported:
368  
-                langfile = gettext_module.find('django', globalpath, [to_locale(lang)])
369  
-                if langfile:
370  
-                    # reconstruct the actual language from the language
371  
-                    # filename, because otherwise we might incorrectly
372  
-                    # report de_DE if we only have de available, but
373  
-                    # did find de_DE because of language normalization
374  
-                    lang = langfile[len(globalpath):].split(os.path.sep)[1]
375  
-                    _accepted[accept] = lang
376  
-                    return lang
  355
+    accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
  356
+    for lang, unused in parse_accept_lang_header(accept):
  357
+        if lang == '*':
  358
+            break
  359
+
  360
+        # We have a very restricted form for our language files (no encoding
  361
+        # specifier, since they all must be UTF-8 and only one possible
  362
+        # language each time. So we avoid the overhead of gettext.find() and
  363
+        # look up the MO file manually.
  364
+
  365
+        normalized = locale.locale_alias.get(to_locale(lang, True))
  366
+        if not normalized:
  367
+            continue
  368
+
  369
+        # Remove the default encoding from locale_alias
  370
+        normalized = normalized.split('.')[0]
  371
+
  372
+        if normalized in _accepted:
  373
+            # We've seen this locale before and have an MO file for it, so no
  374
+            # need to check again.
  375
+            return _accepted[normalized]
  376
+
  377
+        for lang in (normalized, normalized.split('_')[0]):
  378
+            if lang not in supported:
  379
+                continue
  380
+            langfile = os.path.join(globalpath, lang, 'LC_MESSAGES',
  381
+                    'django.mo')
  382
+            if os.path.exists(langfile):
  383
+                _accepted[normalized] = lang
  384
+            return lang
377 385
 
378 386
     return settings.LANGUAGE_CODE
379 387
 
@@ -505,3 +513,23 @@ def templatize(src):
505 513
                 out.write(blankout(t.contents, 'X'))
506 514
     return out.getvalue()
507 515
 
  516
+def parse_accept_lang_header(lang_string):
  517
+    """
  518
+    Parses the lang_string, which is the body of an HTTP Accept-Language
  519
+    header, and returns a list of (lang, q-value), ordered by 'q' values.
  520
+
  521
+    Any format errors in lang_string results in an empty list being returned.
  522
+    """
  523
+    result = []
  524
+    pieces = accept_language_re.split(lang_string)
  525
+    if pieces[-1]:
  526
+        return []
  527
+    for i in range(0, len(pieces) - 1, 3):
  528
+        first, lang, priority = pieces[i : i + 3]
  529
+        if first:
  530
+            return []
  531
+        priority = priority and float(priority) or 1.0
  532
+        result.append((lang, priority))
  533
+    result.sort(lambda x, y: -cmp(x[1], y[1]))
  534
+    return result
  535
+
8  tests/regressiontests/i18n/tests.py
... ...
@@ -1,6 +1,7 @@
1 1
 # coding: utf-8
  2
+import misc
2 3
 
3  
-ur"""
  4
+regressions = ur"""
4 5
 Format string interpolation should work with *_lazy objects.
5 6
 
6 7
 >>> from django.utils.translation import ugettext_lazy, activate, deactivate, gettext_lazy
@@ -39,3 +40,8 @@
39 40
 >>> unicode(django.utils.translation.string_concat("dja", "ngo"))
40 41
 u'django'
41 42
 """
  43
+
  44
+__test__ = {
  45
+    'regressions': regressions,
  46
+    'misc': misc.tests,
  47
+}

0 notes on commit 842a771

Please sign in to comment.
Something went wrong with that request. Please try again.