Merge 34de332 into 4564503

fiorix · Oct 6, 2019 · c4c9e89 · c4c9e89
2 parents 4564503 + 34de332
commit c4c9e89
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 2 deletions.
diff --git a/cyclone/httputil.py b/cyclone/httputil.py
@@ -24,6 +24,7 @@
 from cyclone.util import ObjectDict
 from cyclone.escape import native_str
 from cyclone.escape import parse_qs_bytes
+from cyclone.escape import to_unicode
 from cyclone.escape import utf8
 
 from twisted.python import log
@@ -129,7 +130,9 @@ def parse(cls, headers):
         [('Content-Length', '42'), ('Content-Type', 'text/html')]
         """
         h = cls()
-        for line in headers.splitlines():
+        for line in headers.split("\n"):
+            if line.endswith("\r"):
+                line = line[:-1]
             if line:
                 h.parse_line(line)
         return h
@@ -265,7 +268,7 @@ def parse_multipart_form_data(boundary, data, arguments, files):
         if eoh == -1:
             log.msg("multipart/form-data missing headers")
             continue
-        headers = HTTPHeaders.parse(part[:eoh])
+        headers = HTTPHeaders.parse(to_unicode(part[:eoh]))
         disp_header = headers.get("Content-Disposition", "")
         disposition, disp_params = _parse_header(disp_header)
         if disposition != "form-data" or not part.endswith(b"\r\n"):

diff --git a/cyclone/tests/test_httputil.py b/cyclone/tests/test_httputil.py
@@ -0,0 +1,53 @@
+from twisted.trial import unittest
+
+from cyclone.httputil import HTTPHeaders
+
+
+class TestHTTPHeaders(unittest.TestCase):
+    def test_parse_no_cr(self):
+        """
+        https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
+        "The line terminator for message-header fields is the sequence CRLF.
+         However, we recommend that applications, when parsing such headers,
+         recognize a single LF as a line terminator and ignore the leading CR."
+
+	https://tools.ietf.org/html/rfc7230#section-3.5
+        "Although the line terminator for the start-line and header fields is
+         the sequence CRLF, a recipient MAY recognize a single LF as a line
+         terminator and ignore any preceding CR."
+        """
+        header_data = u"Foo: bar\n" u"Baz: qux"
+        headers = HTTPHeaders.parse(header_data)
+        self.assertEqual(len(list(headers.get_all())), 2)
+        self.assertEqual(headers.get("foo"), "bar")
+        self.assertEqual(headers.get("baz"), "qux")
+
+    def test_parse_crlf(self):
+        """
+        https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
+        "The line terminator for message-header fields is the sequence CRLF.
+         However, we recommend that applications, when parsing such headers,
+         recognize a single LF as a line terminator and ignore the leading CR."
+        """
+        header_data = u"Foo: bar\r\n" u"Baz: qux"
+        headers = HTTPHeaders.parse(header_data)
+        self.assertEqual(len(list(headers.get_all())), 2)
+        self.assertEqual(headers.get("foo"), "bar")
+        self.assertEqual(headers.get("baz"), "qux")
+
+    def test_parse_problematic_newlines(self):
+        """
+        There are some problematic characters that Python considers to be newlines
+        for the purpose of splitlines, but aren't newlines per the RFCs.
+
+        https://docs.python.org/3/library/stdtypes.html#str.splitlines
+        """
+        header_data = (
+            u"Foo: bar\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029asdf: jkl\r\n" u"Baz: qux"
+        )
+        headers = HTTPHeaders.parse(header_data)
+        self.assertEqual(len(list(headers.get_all())), 2)
+        self.assertEqual(
+            headers.get("foo"), u"bar\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029asdf: jkl"
+        )
+        self.assertEqual(headers.get("baz"), "qux")