Skip to content

Commit

Permalink
httputil: Rewrite cookie parsing
Browse files Browse the repository at this point in the history
Move from the python standard library to a parsing function copied from
Django. This parser more closely matches browser behavior. The primary
motivation is that differences between server-side and browser cookie
parsing can lead to an XSRF bypass, as in
https://hackerone.com/reports/26647. A secondary benefit is that this
makes it possible to work with cookie headers containing cookies that
are invalid according to the spec, which is a surprisingly common
request.

Closes tornadoweb#1851
Closes tornadoweb#633
Closes tornadoweb#1434
Closes tornadoweb#1176
  • Loading branch information
bdarnell authored and bwangelme committed Oct 31, 2016
1 parent 45d2562 commit f5ea831
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 6 deletions.
93 changes: 90 additions & 3 deletions tornado/httputil.py
Expand Up @@ -379,10 +379,18 @@ def cookies(self):
self._cookies = Cookie.SimpleCookie()
if "Cookie" in self.headers:
try:
self._cookies.load(
native_str(self.headers["Cookie"]))
parsed = parse_cookie(self.headers["Cookie"])
except Exception:
self._cookies = {}
pass
else:
for k, v in parsed.items():
try:
self._cookies[k] = v
except Exception:
# SimpleCookie imposes some restrictions on keys;
# parse_cookie does not. Discard any cookies
# with disallowed keys.
pass
return self._cookies

def write(self, chunk, callback=None):
Expand Down Expand Up @@ -909,3 +917,82 @@ def split_host_and_port(netloc):
host = netloc
port = None
return (host, port)

_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
_nulljoin = ''.join

def _unquote_cookie(str):
"""Handle double quotes and escaping in cookie values.
This method is copied verbatim from the Python 3.5 standard
library (http.cookies._unquote) so we don't have to depend on
non-public interfaces.
"""
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if str is None or len(str) < 2:
return str
if str[0] != '"' or str[-1] != '"':
return str

# We have to assume that we must decode this string.
# Down to work.

# Remove the "s
str = str[1:-1]

# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
i = 0
n = len(str)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(str, i)
q_match = _QuotePatt.search(str, i)
if not o_match and not q_match: # Neither matched
res.append(str[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(str[i:k])
res.append(str[k+1])
i = k + 2
else: # OctalPatt matched
res.append(str[i:j])
res.append(chr(int(str[j+1:j+4], 8)))
i = j + 4
return _nulljoin(res)


def parse_cookie(cookie):
"""Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
This function attempts to mimic browser cookie parsing behavior;
it specifically does not follow any of the cookie-related RFCs
(because browsers don't either).
The algorithm used is identical to that used by Django version 1.9.10.
.. versionadded:: 4.4.2
"""
cookiedict = {}
for chunk in cookie.split(str(';')):
if str('=') in chunk:
key, val = chunk.split(str('='), 1)
else:
# Assume an empty name per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
key, val = str(''), chunk
key, val = key.strip(), val.strip()
if key or val:
# unquote using Python's algorithm.
cookiedict[key] = _unquote_cookie(val)
return cookiedict
53 changes: 52 additions & 1 deletion tornado/test/httputil_test.py
@@ -1,8 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-


from __future__ import absolute_import, division, print_function, with_statement
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line, parse_cookie
from tornado.escape import utf8, native_str
from tornado.log import gen_log
from tornado.testing import ExpectLog
Expand Down Expand Up @@ -378,3 +379,53 @@ def test_parse_request_start_line(self):
self.assertEqual(parsed_start_line.method, self.METHOD)
self.assertEqual(parsed_start_line.path, self.PATH)
self.assertEqual(parsed_start_line.version, self.VERSION)


class ParseCookieTest(unittest.TestCase):
# These tests copied from Django:
# https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28
def test_python_cookies(self):
"""
Test cases copied from Python's Lib/test/test_http_cookies.py
"""
self.assertEqual(parse_cookie('chips=ahoy; vienna=finger'), {'chips': 'ahoy', 'vienna': 'finger'})
# Here parse_cookie() differs from Python's cookie parsing in that it
# treats all semicolons as delimiters, even within quotes.
self.assertEqual(
parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'),
{'keebler': '"E=mc2', 'L': '\\"Loves\\"', 'fudge': '\\012', '': '"'}
)
# Illegal cookies that have an '=' char in an unquoted value.
self.assertEqual(parse_cookie('keebler=E=mc2'), {'keebler': 'E=mc2'})
# Cookies with ':' character in their name.
self.assertEqual(parse_cookie('key:term=value:term'), {'key:term': 'value:term'})
# Cookies with '[' and ']'.
self.assertEqual(parse_cookie('a=b; c=[; d=r; f=h'), {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'})

def test_cookie_edgecases(self):
# Cookies that RFC6265 allows.
self.assertEqual(parse_cookie('a=b; Domain=example.com'), {'a': 'b', 'Domain': 'example.com'})
# parse_cookie() has historically kept only the last cookie with the
# same name.
self.assertEqual(parse_cookie('a=b; h=i; a=c'), {'a': 'c', 'h': 'i'})

def test_invalid_cookies(self):
"""
Cookie strings that go against RFC6265 but browsers will send if set
via document.cookie.
"""
# Chunks without an equals sign appear as unnamed values per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
self.assertIn('django_language', parse_cookie('abc=def; unnamed; django_language=en').keys())
# Even a double quote may be an unamed value.
self.assertEqual(parse_cookie('a=b; "; c=d'), {'a': 'b', '': '"', 'c': 'd'})
# Spaces in names and values, and an equals sign in values.
self.assertEqual(parse_cookie('a b c=d e = f; gh=i'), {'a b c': 'd e = f', 'gh': 'i'})
# More characters the spec forbids.
self.assertEqual(parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'), {'a b,c<>@:/[]?{}': 'd " =e,f g'})
# Unicode characters. The spec only allows ASCII.
self.assertEqual(parse_cookie('saint=André Bessette'), {'saint': native_str('André Bessette')})
# Browsers don't send extra whitespace or semicolons in Cookie headers,
# but parse_cookie() should parse whitespace the same way
# document.cookie parses whitespace.
self.assertEqual(parse_cookie(' = b ; ; = ; c = ; '), {'': 'b', 'c': ''})
4 changes: 2 additions & 2 deletions tornado/test/web_test.py
Expand Up @@ -279,8 +279,8 @@ def test_cookie_special_char(self):

data = [('foo=a=b', 'a=b'),
('foo="a=b"', 'a=b'),
('foo="a;b"', 'a;b'),
# ('foo=a\\073b', 'a;b'), # even encoded, ";" is a delimiter
('foo="a;b"', '"a'), # even quoted, ";" is a delimiter
('foo=a\\073b', 'a\\073b'), # escapes only decoded in quotes
('foo="a\\073b"', 'a;b'),
('foo="a\\"b"', 'a"b'),
]
Expand Down

0 comments on commit f5ea831

Please sign in to comment.