Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Refactored the unmangling of the WSGI environ.

  • Loading branch information...
commit 4e88d106dc18a3e7c01b218a56451610821275c7 1 parent 636860f
Aymeric Augustin authored

Showing 1 changed file with 29 additions and 14 deletions. Show diff stats Hide diff stats

  1. 43  django/core/handlers/wsgi.py
43  django/core/handlers/wsgi.py
@@ -20,6 +20,9 @@
20 20
 
21 21
 logger = logging.getLogger('django.request')
22 22
 
  23
+# encode() and decode() expect the charset to be a native string.
  24
+ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
  25
+
23 26
 
24 27
 class LimitedStream(object):
25 28
     '''
@@ -214,13 +217,10 @@ def get_path_info(environ):
214 217
     """
215 218
     Returns the HTTP request's PATH_INFO as a unicode string.
216 219
     """
217  
-    path_info = environ.get('PATH_INFO', str('/'))
218  
-    # Under Python 3, strings in environ are decoded with ISO-8859-1;
219  
-    # re-encode to recover the original bytestring provided by the web server.
220  
-    if six.PY3:
221  
-        path_info = path_info.encode('iso-8859-1')
  220
+    path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
  221
+
222 222
     # It'd be better to implement URI-to-IRI decoding, see #19508.
223  
-    return path_info.decode('utf-8')
  223
+    return path_info.decode(UTF_8)
224 224
 
225 225
 
226 226
 def get_script_name(environ):
@@ -239,14 +239,29 @@ def get_script_name(environ):
239 239
     # rewrites. Unfortunately not every Web server (lighttpd!) passes this
240 240
     # information through all the time, so FORCE_SCRIPT_NAME, above, is still
241 241
     # needed.
242  
-    script_url = environ.get('SCRIPT_URL', environ.get('REDIRECT_URL', str('')))
  242
+    script_url = get_bytes_from_wsgi(environ, 'SCRIPT_URL', '')
  243
+    if not script_url:
  244
+        script_url = get_bytes_from_wsgi(environ, 'REDIRECT_URL', '')
  245
+
243 246
     if script_url:
244  
-        script_name = script_url[:-len(environ.get('PATH_INFO', str('')))]
  247
+        path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '')
  248
+        script_name = script_url[:-len(path_info)]
245 249
     else:
246  
-        script_name = environ.get('SCRIPT_NAME', str(''))
247  
-    # Under Python 3, strings in environ are decoded with ISO-8859-1;
248  
-    # re-encode to recover the original bytestring provided by the web server.
249  
-    if six.PY3:
250  
-        script_name = script_name.encode('iso-8859-1')
  250
+        script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
  251
+
251 252
     # It'd be better to implement URI-to-IRI decoding, see #19508.
252  
-    return script_name.decode('utf-8')
  253
+    return script_name.decode(UTF_8)
  254
+
  255
+
  256
+def get_bytes_from_wsgi(environ, key, default):
  257
+    """
  258
+    Get a value from the WSGI environ dictionary as bytes.
  259
+
  260
+    key and default should be str objects. Under Python 2 they may also be
  261
+    unicode objects provided they only contain ASCII characters.
  262
+    """
  263
+    value = environ.get(str(key), str(default))
  264
+    # Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
  265
+    # decoded with ISO-8859-1. This is wrong for Django websites where UTF-8
  266
+    # is the default. Re-encode to recover the original bytestring.
  267
+    return value if six.PY2 else value.encode(ISO_8859_1)

0 notes on commit 4e88d10

Please sign in to comment.
Something went wrong with that request. Please try again.