Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only split headers on \n and \r\n #189

Merged
merged 2 commits into from Oct 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions cyclone/httputil.py
Expand Up @@ -24,6 +24,7 @@
from cyclone.util import ObjectDict
from cyclone.escape import native_str
from cyclone.escape import parse_qs_bytes
from cyclone.escape import to_unicode
from cyclone.escape import utf8

from twisted.python import log
Expand Down Expand Up @@ -129,7 +130,9 @@ def parse(cls, headers):
[('Content-Length', '42'), ('Content-Type', 'text/html')]
"""
h = cls()
for line in headers.splitlines():
for line in headers.split("\n"):
if line.endswith("\r"):
line = line[:-1]
if line:
h.parse_line(line)
return h
Expand Down Expand Up @@ -265,7 +268,7 @@ def parse_multipart_form_data(boundary, data, arguments, files):
if eoh == -1:
log.msg("multipart/form-data missing headers")
continue
headers = HTTPHeaders.parse(part[:eoh])
headers = HTTPHeaders.parse(to_unicode(part[:eoh]))
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HTTPHeaders.parse is also used in cyclone.httpserver, that usage also converts the argument to unicode first.

disp_header = headers.get("Content-Disposition", "")
disposition, disp_params = _parse_header(disp_header)
if disposition != "form-data" or not part.endswith(b"\r\n"):
Expand Down
53 changes: 53 additions & 0 deletions cyclone/tests/test_httputil.py
@@ -0,0 +1,53 @@
from twisted.trial import unittest

from cyclone.httputil import HTTPHeaders


class TestHTTPHeaders(unittest.TestCase):
def test_parse_no_cr(self):
"""
https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
"The line terminator for message-header fields is the sequence CRLF.
However, we recommend that applications, when parsing such headers,
recognize a single LF as a line terminator and ignore the leading CR."

https://tools.ietf.org/html/rfc7230#section-3.5
"Although the line terminator for the start-line and header fields is
the sequence CRLF, a recipient MAY recognize a single LF as a line
terminator and ignore any preceding CR."
"""
header_data = u"Foo: bar\n" u"Baz: qux"
headers = HTTPHeaders.parse(header_data)
self.assertEqual(len(list(headers.get_all())), 2)
self.assertEqual(headers.get("foo"), "bar")
self.assertEqual(headers.get("baz"), "qux")

def test_parse_crlf(self):
"""
https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
"The line terminator for message-header fields is the sequence CRLF.
However, we recommend that applications, when parsing such headers,
recognize a single LF as a line terminator and ignore the leading CR."
"""
header_data = u"Foo: bar\r\n" u"Baz: qux"
headers = HTTPHeaders.parse(header_data)
self.assertEqual(len(list(headers.get_all())), 2)
self.assertEqual(headers.get("foo"), "bar")
self.assertEqual(headers.get("baz"), "qux")

def test_parse_problematic_newlines(self):
"""
There are some problematic characters that Python considers to be newlines
for the purpose of splitlines, but aren't newlines per the RFCs.

https://docs.python.org/3/library/stdtypes.html#str.splitlines
"""
header_data = (
u"Foo: bar\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029asdf: jkl\r\n" u"Baz: qux"
)
headers = HTTPHeaders.parse(header_data)
self.assertEqual(len(list(headers.get_all())), 2)
self.assertEqual(
headers.get("foo"), u"bar\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029asdf: jkl"
)
self.assertEqual(headers.get("baz"), "qux")