Skip to content

Commit

Permalink
Merge 34de332 into 4564503
Browse files Browse the repository at this point in the history
  • Loading branch information
mstojcevich committed Oct 6, 2019
2 parents 4564503 + 34de332 commit c4c9e89
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
7 changes: 5 additions & 2 deletions cyclone/httputil.py
Expand Up @@ -24,6 +24,7 @@
from cyclone.util import ObjectDict
from cyclone.escape import native_str
from cyclone.escape import parse_qs_bytes
from cyclone.escape import to_unicode
from cyclone.escape import utf8

from twisted.python import log
Expand Down Expand Up @@ -129,7 +130,9 @@ def parse(cls, headers):
[('Content-Length', '42'), ('Content-Type', 'text/html')]
"""
h = cls()
for line in headers.splitlines():
for line in headers.split("\n"):
if line.endswith("\r"):
line = line[:-1]
if line:
h.parse_line(line)
return h
Expand Down Expand Up @@ -265,7 +268,7 @@ def parse_multipart_form_data(boundary, data, arguments, files):
if eoh == -1:
log.msg("multipart/form-data missing headers")
continue
headers = HTTPHeaders.parse(part[:eoh])
headers = HTTPHeaders.parse(to_unicode(part[:eoh]))
disp_header = headers.get("Content-Disposition", "")
disposition, disp_params = _parse_header(disp_header)
if disposition != "form-data" or not part.endswith(b"\r\n"):
Expand Down
53 changes: 53 additions & 0 deletions cyclone/tests/test_httputil.py
@@ -0,0 +1,53 @@
from twisted.trial import unittest

from cyclone.httputil import HTTPHeaders


class TestHTTPHeaders(unittest.TestCase):
def test_parse_no_cr(self):
"""
https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
"The line terminator for message-header fields is the sequence CRLF.
However, we recommend that applications, when parsing such headers,
recognize a single LF as a line terminator and ignore the leading CR."
https://tools.ietf.org/html/rfc7230#section-3.5
"Although the line terminator for the start-line and header fields is
the sequence CRLF, a recipient MAY recognize a single LF as a line
terminator and ignore any preceding CR."
"""
header_data = u"Foo: bar\n" u"Baz: qux"
headers = HTTPHeaders.parse(header_data)
self.assertEqual(len(list(headers.get_all())), 2)
self.assertEqual(headers.get("foo"), "bar")
self.assertEqual(headers.get("baz"), "qux")

def test_parse_crlf(self):
"""
https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
"The line terminator for message-header fields is the sequence CRLF.
However, we recommend that applications, when parsing such headers,
recognize a single LF as a line terminator and ignore the leading CR."
"""
header_data = u"Foo: bar\r\n" u"Baz: qux"
headers = HTTPHeaders.parse(header_data)
self.assertEqual(len(list(headers.get_all())), 2)
self.assertEqual(headers.get("foo"), "bar")
self.assertEqual(headers.get("baz"), "qux")

def test_parse_problematic_newlines(self):
"""
There are some problematic characters that Python considers to be newlines
for the purpose of splitlines, but aren't newlines per the RFCs.
https://docs.python.org/3/library/stdtypes.html#str.splitlines
"""
header_data = (
u"Foo: bar\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029asdf: jkl\r\n" u"Baz: qux"
)
headers = HTTPHeaders.parse(header_data)
self.assertEqual(len(list(headers.get_all())), 2)
self.assertEqual(
headers.get("foo"), u"bar\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029asdf: jkl"
)
self.assertEqual(headers.get("baz"), "qux")

0 comments on commit c4c9e89

Please sign in to comment.