Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

[1.2.X] Fixed #14235 - UnicodeDecodeError in CSRF middleware

  
Thanks to jbg for the report.
  
This changeset essentially backs out [13698] in favour of a method that
sanitizes the token rather than escaping it.

Backport of [13732] from trunk.




git-svn-id: http://code.djangoproject.com/svn/django/branches/releases/1.2.X@13733 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 890b0b6234f3ae742a9cb97f27fe0bb7d900b93b 1 parent 43988e9
Luke Plant authored September 10, 2010
23  django/middleware/csrf.py
@@ -13,7 +13,6 @@
13 13
 from django.core.urlresolvers import get_callable
14 14
 from django.utils.cache import patch_vary_headers
15 15
 from django.utils.hashcompat import md5_constructor
16  
-from django.utils.html import escape
17 16
 from django.utils.safestring import mark_safe
18 17
 
19 18
 _POST_FORM_RE = \
@@ -53,8 +52,8 @@ def _make_legacy_session_token(session_id):
53 52
 
54 53
 def get_token(request):
55 54
     """
56  
-    Returns the the CSRF token required for a POST form. No assumptions should
57  
-    be made about what characters might be in the CSRF token.
  55
+    Returns the the CSRF token required for a POST form. The token is an
  56
+    alphanumeric value.
58 57
 
59 58
     A side effect of calling this function is to make the the csrf_protect
60 59
     decorator and the CsrfViewMiddleware add a CSRF cookie and a 'Vary: Cookie'
@@ -65,6 +64,17 @@ def get_token(request):
65 64
     return request.META.get("CSRF_COOKIE", None)
66 65
 
67 66
 
  67
+def _sanitize_token(token):
  68
+    # Allow only alphanum, and ensure we return a 'str' for the sake of the post
  69
+    # processing middleware.
  70
+    token = re.sub('[^a-zA-Z0-9]', '', str(token.decode('ascii', 'ignore')))
  71
+    if token == "":
  72
+        # In case the cookie has been truncated to nothing at some point.
  73
+        return _get_new_csrf_key()
  74
+    else:
  75
+        return token
  76
+
  77
+
68 78
 class CsrfViewMiddleware(object):
69 79
     """
70 80
     Middleware that requires a present and correct csrfmiddlewaretoken
@@ -90,7 +100,10 @@ def accept():
90 100
         # request, so it's available to the view.  We'll store it in a cookie when
91 101
         # we reach the response.
92 102
         try:
93  
-            request.META["CSRF_COOKIE"] = request.COOKIES[settings.CSRF_COOKIE_NAME]
  103
+            # In case of cookies from untrusted sources, we strip anything
  104
+            # dangerous at this point, so that the cookie + token will have the
  105
+            # same, sanitized value.
  106
+            request.META["CSRF_COOKIE"] = _sanitize_token(request.COOKIES[settings.CSRF_COOKIE_NAME])
94 107
             cookie_is_new = False
95 108
         except KeyError:
96 109
             # No cookie, so create one.  This will be sent with the next
@@ -235,7 +248,7 @@ def add_csrf_field(match):
235 248
                 """Returns the matched <form> tag plus the added <input> element"""
236 249
                 return mark_safe(match.group() + "<div style='display:none;'>" + \
237 250
                 "<input type='hidden' " + idattributes.next() + \
238  
-                " name='csrfmiddlewaretoken' value='" + escape(csrf_token) + \
  251
+                " name='csrfmiddlewaretoken' value='" + csrf_token + \
239 252
                 "' /></div>")
240 253
 
241 254
             # Modify any POST forms
3  django/template/defaulttags.py
@@ -9,7 +9,6 @@
9 9
 from django.template import get_library, Library, InvalidTemplateLibrary
10 10
 from django.template.smartif import IfParser, Literal
11 11
 from django.conf import settings
12  
-from django.utils.html import escape
13 12
 from django.utils.encoding import smart_str, smart_unicode
14 13
 from django.utils.safestring import mark_safe
15 14
 
@@ -43,7 +42,7 @@ def render(self, context):
43 42
             if csrf_token == 'NOTPROVIDED':
44 43
                 return mark_safe(u"")
45 44
             else:
46  
-                return mark_safe(u"<div style='display:none'><input type='hidden' name='csrfmiddlewaretoken' value='%s' /></div>" % escape(csrf_token))
  45
+                return mark_safe(u"<div style='display:none'><input type='hidden' name='csrfmiddlewaretoken' value='%s' /></div>" % csrf_token)
47 46
         else:
48 47
             # It's very probable that the token is missing because of
49 48
             # misconfiguration, so we raise a warning
25  tests/regressiontests/csrf_tests/tests.py
@@ -6,15 +6,14 @@
6 6
 from django.views.decorators.csrf import csrf_exempt, csrf_view_exempt
7 7
 from django.core.context_processors import csrf
8 8
 from django.contrib.sessions.middleware import SessionMiddleware
9  
-from django.utils.html import escape
10 9
 from django.utils.importlib import import_module
11 10
 from django.conf import settings
12 11
 from django.template import RequestContext, Template
13 12
 
14 13
 # Response/views used for CsrfResponseMiddleware and CsrfViewMiddleware tests
15 14
 def post_form_response():
16  
-    resp = HttpResponse(content="""
17  
-<html><body><form method="post"><input type="text" /></form></body></html>
  15
+    resp = HttpResponse(content=u"""
  16
+<html><body><h1>\u00a1Unicode!<form method="post"><input type="text" /></form></body></html>
18 17
 """, mimetype="text/html")
19 18
     return resp
20 19
 
@@ -58,8 +57,9 @@ def is_secure(self):
58 57
 
59 58
 class CsrfMiddlewareTest(TestCase):
60 59
     # The csrf token is potentially from an untrusted source, so could have
61  
-    # characters that need escaping
62  
-    _csrf_id = "<1>"
  60
+    # characters that need dealing with.
  61
+    _csrf_id_cookie = "<1>\xc2\xa1"
  62
+    _csrf_id = "1"
63 63
 
64 64
     # This is a valid session token for this ID and secret key.  This was generated using
65 65
     # the old code that we're to be backwards-compatible with.  Don't use the CSRF code
@@ -74,7 +74,7 @@ def _get_GET_no_csrf_cookie_request(self):
74 74
 
75 75
     def _get_GET_csrf_cookie_request(self):
76 76
         req = TestingHttpRequest()
77  
-        req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id
  77
+        req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id_cookie
78 78
         return req
79 79
 
80 80
     def _get_POST_csrf_cookie_request(self):
@@ -104,7 +104,7 @@ def _get_POST_session_request_no_token(self):
104 104
         return req
105 105
 
106 106
     def _check_token_present(self, response, csrf_id=None):
107  
-        self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % escape(csrf_id or self._csrf_id))
  107
+        self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % (csrf_id or self._csrf_id))
108 108
 
109 109
     # Check the post processing and outgoing cookie
110 110
     def test_process_response_no_csrf_cookie(self):
@@ -290,6 +290,17 @@ def test_token_node_no_csrf_cookie(self):
290 290
         resp = token_view(req)
291 291
         self.assertEquals(u"", resp.content)
292 292
 
  293
+    def test_token_node_empty_csrf_cookie(self):
  294
+        """
  295
+        Check that we get a new token if the csrf_cookie is the empty string
  296
+        """
  297
+        req = self._get_GET_no_csrf_cookie_request()
  298
+        req.COOKIES[settings.CSRF_COOKIE_NAME] = ""
  299
+        CsrfViewMiddleware().process_view(req, token_view, (), {})
  300
+        resp = token_view(req)
  301
+
  302
+        self.assertNotEqual(u"", resp.content)
  303
+
293 304
     def test_token_node_with_csrf_cookie(self):
294 305
         """
295 306
         Check that CsrfTokenNode works when a CSRF cookie is set

0 notes on commit 890b0b6

Please sign in to comment.
Something went wrong with that request. Please try again.