diff --git a/CHANGES/4012.bugfix b/CHANGES/4012.bugfix new file mode 100644 index 00000000000..279de94d821 --- /dev/null +++ b/CHANGES/4012.bugfix @@ -0,0 +1,3 @@ +Only encode content-disposition filename parameter using percent-encoding. +Other parameters are encoded to quoted-string or RFC2231 extended parameter +value. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 08a3db0cc29..84077a247e7 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -195,6 +195,7 @@ Marat Sharafutdinov Marco Paolini Mariano Anaya Mariusz Masztalerczuk +Marko Kohtala Martijn Pieters Martin Melka Martin Richard diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 4bc299ea7b9..1b572fd3c0b 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -332,14 +332,41 @@ def guess_filename(obj: Any, default: Optional[str] = None) -> Optional[str]: return default +not_qtext_re = re.compile(r"[^\041\043-\133\135-\176]") +QCONTENT = {chr(i) for i in range(0x20, 0x7F)} | {"\t"} + + +def quoted_string(content: str) -> str: + """Return 7-bit content as quoted-string. + + Format content into a quoted-string as defined in RFC5322 for + Internet Message Format. Notice that this is not the 8-bit HTTP + format, but the 7-bit email format. Content must be in usascii or + a ValueError is raised. + """ + if not (QCONTENT > set(content)): + raise ValueError(f"bad content for quoted-string {content!r}") + return not_qtext_re.sub(lambda x: "\\" + x.group(0), content) + + def content_disposition_header( - disptype: str, quote_fields: bool = True, **params: str + disptype: str, quote_fields: bool = True, _charset: str = "utf-8", **params: str ) -> str: - """Sets ``Content-Disposition`` header. + """Sets ``Content-Disposition`` header for MIME. + + This is the MIME payload Content-Disposition header from RFC 2183 + and RFC 7579 section 4.2, not the HTTP Content-Disposition from + RFC 6266. disptype is a disposition type: inline, attachment, form-data. Should be valid extension token (see RFC 2183) + quote_fields performs value quoting to 7-bit MIME headers + according to RFC 7578. Set to quote_fields to False if recipient + can take 8-bit file names and field values. + + _charset specifies the charset to use when quote_fields is True. + params is a dict with disposition params. """ if not disptype or not (TOKEN > set(disptype)): @@ -353,10 +380,23 @@ def content_disposition_header( raise ValueError( "bad content disposition parameter" " {!r}={!r}".format(key, val) ) - qval = quote(val, "") if quote_fields else val - lparams.append((key, '"%s"' % qval)) - if key == "filename": - lparams.append(("filename*", "utf-8''" + qval)) + if quote_fields: + if key.lower() == "filename": + qval = quote(val, "", encoding=_charset) + lparams.append((key, '"%s"' % qval)) + else: + try: + qval = quoted_string(val) + except ValueError: + qval = "".join( + (_charset, "''", quote(val, "", encoding=_charset)) + ) + lparams.append((key + "*", qval)) + else: + lparams.append((key, '"%s"' % qval)) + else: + qval = val.replace("\\", "\\\\").replace('"', '\\"') + lparams.append((key, '"%s"' % qval)) sparams = "; ".join("=".join(pair) for pair in lparams) value = "; ".join((value, sparams)) return value diff --git a/aiohttp/payload.py b/aiohttp/payload.py index db029a33a7b..0e881e41eae 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -191,11 +191,15 @@ def content_type(self) -> str: return self._headers[hdrs.CONTENT_TYPE] def set_content_disposition( - self, disptype: str, quote_fields: bool = True, **params: Any + self, + disptype: str, + quote_fields: bool = True, + _charset: str = "utf-8", + **params: Any, ) -> None: """Sets ``Content-Disposition`` header.""" self._headers[hdrs.CONTENT_DISPOSITION] = content_disposition_header( - disptype, quote_fields=quote_fields, **params + disptype, quote_fields=quote_fields, _charset=_charset, **params ) @abstractmethod diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 1ec3a1aeb05..52d74d98324 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -1431,7 +1431,7 @@ async def handler(request): "text/x-python", ] assert request.headers["content-disposition"] == ( - "inline; filename=\"conftest.py\"; filename*=utf-8''conftest.py" + 'inline; filename="conftest.py"' ) return web.Response() diff --git a/tests/test_formdata.py b/tests/test_formdata.py index 2b776d996d1..4b1e4cc855f 100644 --- a/tests/test_formdata.py +++ b/tests/test_formdata.py @@ -74,18 +74,18 @@ def test_invalid_formdata_content_transfer_encoding() -> None: async def test_formdata_field_name_is_quoted(buf: Any, writer: Any) -> None: form = FormData(charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails%5B%5D"' in buf + assert b'name="email\\ 1"' in buf async def test_formdata_field_name_is_not_quoted(buf: Any, writer: Any) -> None: form = FormData(quote_fields=False, charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails[]"' in buf + assert b'name="email 1"' in buf async def test_mark_formdata_as_processed() -> None: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 28ac82db555..859ef2728c8 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -435,11 +435,25 @@ async def test_ceil_timeout_small(loop) -> None: # -------------------------------- ContentDisposition ------------------- -def test_content_disposition() -> None: - assert ( - helpers.content_disposition_header("attachment", foo="bar") - == 'attachment; foo="bar"' - ) +@pytest.mark.parametrize( + "kwargs, result", + [ + (dict(foo="bar"), 'attachment; foo="bar"'), + (dict(foo="bar[]"), 'attachment; foo="bar[]"'), + (dict(foo=' a""b\\'), 'attachment; foo="\\ a\\"\\"b\\\\"'), + (dict(foo="bär"), "attachment; foo*=utf-8''b%C3%A4r"), + (dict(foo='bär "\\', quote_fields=False), 'attachment; foo="bär \\"\\\\"'), + (dict(foo="bär", _charset="latin-1"), "attachment; foo*=latin-1''b%E4r"), + (dict(filename="bär"), 'attachment; filename="b%C3%A4r"'), + (dict(filename="bär", _charset="latin-1"), 'attachment; filename="b%E4r"'), + ( + dict(filename='bär "\\', quote_fields=False), + 'attachment; filename="bär \\"\\\\"', + ), + ], +) +def test_content_disposition(kwargs, result) -> None: + assert helpers.content_disposition_header("attachment", **kwargs) == result def test_content_disposition_bad_type() -> None: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 749bf76dc8d..c6336d7881c 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1318,7 +1318,7 @@ async def test_write_preserves_content_disposition( b"Content-Type: test/passed\r\n" b"Content-Length: 3\r\n" b"Content-Disposition:" - b" form-data; filename=\"bug\"; filename*=utf-8''bug" + b' form-data; filename="bug"' ) assert message == b"foo\r\n--:--\r\n" @@ -1405,7 +1405,7 @@ async def test_reset_content_disposition_header( b"--:\r\n" b"Content-Type: text/plain\r\n" b"Content-Disposition:" - b" attachments; filename=\"bug.py\"; filename*=utf-8''bug.py\r\n" + b' attachments; filename="bug.py"\r\n' b"Content-Length: %s" b"" % (str(content_length).encode(),) ) diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py index 64bed11c630..bc66565f972 100644 --- a/tests/test_web_functional.py +++ b/tests/test_web_functional.py @@ -827,9 +827,7 @@ async def handler(request): resp = await client.get("/") assert 200 == resp.status resp_data = await resp.read() - expected_content_disposition = ( - "attachment; filename=\"conftest.py\"; filename*=utf-8''conftest.py" - ) + expected_content_disposition = 'attachment; filename="conftest.py"' assert resp_data == data assert resp.headers.get("Content-Type") in ( "application/octet-stream", @@ -857,9 +855,7 @@ async def handler(request): resp = await client.get("/") assert 200 == resp.status resp_data = await resp.read() - expected_content_disposition = ( - "attachment; filename=\"conftest.py\"; filename*=utf-8''conftest.py" - ) + expected_content_disposition = 'attachment; filename="conftest.py"' assert resp_data == data assert resp.headers.get("Content-Type") == "text/binary" assert resp.headers.get("Content-Length") == str(len(resp_data)) @@ -886,10 +882,7 @@ async def handler(request): assert resp_data == data assert resp.headers.get("Content-Type") == "text/binary" assert resp.headers.get("Content-Length") == str(len(resp_data)) - assert ( - resp.headers.get("Content-Disposition") - == "inline; filename=\"test.txt\"; filename*=utf-8''test.txt" - ) + assert resp.headers.get("Content-Disposition") == 'inline; filename="test.txt"' async def test_response_with_payload_stringio(aiohttp_client: Any, fname: Any) -> None: @@ -1531,10 +1524,7 @@ async def handler(request): assert body == b"test" disp = multipart.parse_content_disposition(resp.headers["content-disposition"]) - assert disp == ( - "attachment", - {"name": "file", "filename": "file", "filename*": "file"}, - ) + assert disp == ("attachment", {"name": "file", "filename": "file"}) async def test_response_with_bodypart_named(aiohttp_client: Any, tmp_path: Any) -> None: @@ -1557,10 +1547,7 @@ async def handler(request): assert body == b"test" disp = multipart.parse_content_disposition(resp.headers["content-disposition"]) - assert disp == ( - "attachment", - {"name": "file", "filename": "foobar.txt", "filename*": "foobar.txt"}, - ) + assert disp == ("attachment", {"name": "file", "filename": "foobar.txt"}) async def test_response_with_bodypart_invalid_name(aiohttp_client: Any) -> None: