Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of multipart/form-data #8280

Merged
merged 39 commits into from Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
cf9a42c
Fix handling of multipart/form-data
Dreamsorcerer Apr 1, 2024
cbea0c6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
d9b039b
Create 8280.bugfix.rst
Dreamsorcerer Apr 1, 2024
970fda1
Update multipart.py
Dreamsorcerer Apr 1, 2024
1cec2cf
Update multipart.py
Dreamsorcerer Apr 1, 2024
56c0d6e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
af851dd
Update multipart.py
Dreamsorcerer Apr 1, 2024
91af09f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
1c6fcd7
Update multipart.py
Dreamsorcerer Apr 1, 2024
e874e3a
Update multipart.py
Dreamsorcerer Apr 1, 2024
d992d10
Update multipart.py
Dreamsorcerer Apr 1, 2024
502adac
Update test_multipart.py
Dreamsorcerer Apr 1, 2024
37a9414
Update test_web_functional.py
Dreamsorcerer Apr 1, 2024
57c71e1
Update formdata.py
Dreamsorcerer Apr 1, 2024
1adcd0a
Update test_client_functional.py
Dreamsorcerer Apr 1, 2024
3751264
Create 8280.breaking.rst
Dreamsorcerer Apr 1, 2024
4d11802
Update multipart.py
Dreamsorcerer Apr 1, 2024
cfa1d54
Update multipart.py
Dreamsorcerer Apr 3, 2024
91c4f65
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 3, 2024
00e76ab
Update multipart.py
Dreamsorcerer Apr 3, 2024
438fcf3
Update test_multipart.py
Dreamsorcerer Apr 3, 2024
db85964
Update CHANGES/8280.bugfix.rst
Dreamsorcerer Apr 4, 2024
e85bafb
Update CHANGES/8280.breaking.rst
Dreamsorcerer Apr 4, 2024
d24602a
Update multipart.py
Dreamsorcerer Apr 4, 2024
d9edc4d
Replace with warnings
Dreamsorcerer Apr 5, 2024
368ce2a
Readd deprecated parameter to tests
Dreamsorcerer Apr 5, 2024
a26d045
Update and rename 8280.breaking.rst to 8280.deprecation.rst
Dreamsorcerer Apr 5, 2024
e46f47f
Import
Dreamsorcerer Apr 5, 2024
967ba6b
Update test_web_functional.py
Dreamsorcerer Apr 5, 2024
f665a18
Update formdata.py
Dreamsorcerer Apr 5, 2024
2231e8c
Update test_client_functional.py
Dreamsorcerer Apr 5, 2024
846f02c
Update formdata.py
Dreamsorcerer Apr 5, 2024
391e725
Update test_web_functional.py
Dreamsorcerer Apr 5, 2024
60204e3
Update test_web_functional.py
Dreamsorcerer Apr 5, 2024
39a2d0a
Add tests for _charset_
Dreamsorcerer Apr 5, 2024
0ef28a7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 5, 2024
f6c816d
Update test_multipart.py
Dreamsorcerer Apr 5, 2024
be5b98c
Merge branch 'master' into Dreamsorcerer-patch-6
Dreamsorcerer Apr 6, 2024
c02cab6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/8280.bugfix.rst
@@ -0,0 +1 @@
Fixed ``multipart/form-data`` compliance with :rfc:`7578` -- by :user:`Dreamsorcerer`.
2 changes: 2 additions & 0 deletions CHANGES/8280.deprecation.rst
@@ -0,0 +1,2 @@
Deprecated ``content_transfer_encoding`` parameter in :py:meth:`FormData.add_field()
<aiohttp.FormData.add_field>` -- by :user:`Dreamsorcerer`.
12 changes: 11 additions & 1 deletion aiohttp/formdata.py
@@ -1,4 +1,5 @@
import io
import warnings
from typing import Any, Iterable, List, Optional
from urllib.parse import urlencode

Expand Down Expand Up @@ -54,7 +55,12 @@
if isinstance(value, io.IOBase):
self._is_multipart = True
elif isinstance(value, (bytes, bytearray, memoryview)):
msg = (
"In v4, passing bytes will no longer create a file field. "
"Please explicitly use the filename parameter or pass a BytesIO object."
)
if filename is None and content_transfer_encoding is None:
warnings.warn(msg, DeprecationWarning)

Check warning on line 63 in aiohttp/formdata.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/formdata.py#L63

Added line #L63 was not covered by tests
filename = name

type_options: MultiDict[str] = MultiDict({"name": name})
Expand Down Expand Up @@ -82,7 +88,11 @@
"content_transfer_encoding must be an instance"
" of str. Got: %s" % content_transfer_encoding
)
headers[hdrs.CONTENT_TRANSFER_ENCODING] = content_transfer_encoding
msg = (
"content_transfer_encoding is deprecated. "
"To maintain compatibility with v4 please pass a BytesPayload."
)
warnings.warn(msg, DeprecationWarning)
self._is_multipart = True

self._fields.append((type_options, headers, value))
Expand Down
116 changes: 76 additions & 40 deletions aiohttp/multipart.py
Expand Up @@ -260,12 +260,18 @@
boundary: bytes,
headers: "CIMultiDictProxy[str]",
content: StreamReader,
*,
subtype: str = "mixed",
default_charset: Optional[str] = None,
) -> None:
self.headers = headers
self._boundary = boundary
self._content = content
self._default_charset = default_charset
self._at_eof = False
length = self.headers.get(CONTENT_LENGTH, None)
self._is_form_data = subtype == "form-data"
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
length = None if self._is_form_data else self.headers.get(CONTENT_LENGTH, None)
self._length = int(length) if length is not None else None
self._read_bytes = 0
self._unread: Deque[bytes] = deque()
Expand Down Expand Up @@ -357,6 +363,8 @@
assert self._length is not None, "Content-Length required for chunked read"
chunk_size = min(size, self._length - self._read_bytes)
chunk = await self._content.read(chunk_size)
if self._content.at_eof():
self._at_eof = True

Check warning on line 367 in aiohttp/multipart.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/multipart.py#L367

Added line #L367 was not covered by tests
return chunk

async def _read_chunk_from_stream(self, size: int) -> bytes:
Expand Down Expand Up @@ -477,7 +485,8 @@
"""
if CONTENT_TRANSFER_ENCODING in self.headers:
data = self._decode_content_transfer(data)
if CONTENT_ENCODING in self.headers:
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
if not self._is_form_data and CONTENT_ENCODING in self.headers:
return self._decode_content(data)
return data

Expand Down Expand Up @@ -511,7 +520,7 @@
"""Returns charset parameter from Content-Type header or default."""
ctype = self.headers.get(CONTENT_TYPE, "")
mimetype = parse_mimetype(ctype)
return mimetype.parameters.get("charset", default)
return mimetype.parameters.get("charset", self._default_charset or default)

@reify
def name(self) -> Optional[str]:
Expand Down Expand Up @@ -570,9 +579,17 @@
headers: Mapping[str, str],
content: StreamReader,
) -> None:
self._mimetype = parse_mimetype(headers[CONTENT_TYPE])
assert self._mimetype.type == "multipart", "multipart/* content type expected"
if "boundary" not in self._mimetype.parameters:
raise ValueError(

Check warning on line 585 in aiohttp/multipart.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/multipart.py#L585

Added line #L585 was not covered by tests
"boundary missed for Content-Type: %s" % headers[CONTENT_TYPE]
)

self.headers = headers
self._boundary = ("--" + self._get_boundary()).encode()
self._content = content
self._default_charset: Optional[str] = None
self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None
self._at_eof = False
self._at_bof = True
Expand Down Expand Up @@ -624,7 +641,24 @@
await self._read_boundary()
if self._at_eof: # we just read the last boundary, nothing to do there
return None
self._last_part = await self.fetch_next_part()

part = await self.fetch_next_part()
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.6
if (
self._last_part is None
and self._mimetype.subtype == "form-data"
and isinstance(part, BodyPartReader)
):
_, params = parse_content_disposition(part.headers.get(CONTENT_DISPOSITION))
if params.get("name") == "_charset_":
# Longest encoding in https://encoding.spec.whatwg.org/encodings.json
# is 19 characters, so 32 should be more than enough for any valid encoding.
charset = await part.read_chunk(32)
if len(charset) > 31:
raise RuntimeError("Invalid default charset")
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
self._default_charset = charset.strip().decode()
part = await self.fetch_next_part()
self._last_part = part
return self._last_part

async def release(self) -> None:
Expand Down Expand Up @@ -660,19 +694,16 @@
return type(self)(headers, self._content)
return self.multipart_reader_cls(headers, self._content)
else:
return self.part_reader_cls(self._boundary, headers, self._content)

def _get_boundary(self) -> str:
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])

assert mimetype.type == "multipart", "multipart/* content type expected"

if "boundary" not in mimetype.parameters:
raise ValueError(
"boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE]
return self.part_reader_cls(
self._boundary,
headers,
self._content,
subtype=self._mimetype.subtype,
default_charset=self._default_charset,
)

boundary = mimetype.parameters["boundary"]
def _get_boundary(self) -> str:
boundary = self._mimetype.parameters["boundary"]
if len(boundary) > 70:
raise ValueError("boundary %r is too long (70 chars max)" % boundary)

Expand Down Expand Up @@ -765,6 +796,7 @@
super().__init__(None, content_type=ctype)

self._parts: List[_Part] = []
self._is_form_data = subtype == "form-data"

def __enter__(self) -> "MultipartWriter":
return self
Expand Down Expand Up @@ -842,32 +874,36 @@

def append_payload(self, payload: Payload) -> Payload:
"""Adds a new body part to multipart writer."""
# compression
encoding: Optional[str] = payload.headers.get(
CONTENT_ENCODING,
"",
).lower()
if encoding and encoding not in ("deflate", "gzip", "identity"):
raise RuntimeError(f"unknown content encoding: {encoding}")
if encoding == "identity":
encoding = None

# te encoding
te_encoding: Optional[str] = payload.headers.get(
CONTENT_TRANSFER_ENCODING,
"",
).lower()
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
raise RuntimeError(
"unknown content transfer encoding: {}" "".format(te_encoding)
encoding: Optional[str] = None
te_encoding: Optional[str] = None
if self._is_form_data:
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.7
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
assert CONTENT_DISPOSITION in payload.headers
assert "name=" in payload.headers[CONTENT_DISPOSITION]
assert (
not {CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING}
& payload.headers.keys()
)
if te_encoding == "binary":
te_encoding = None

# size
size = payload.size
if size is not None and not (encoding or te_encoding):
payload.headers[CONTENT_LENGTH] = str(size)
else:
# compression
encoding = payload.headers.get(CONTENT_ENCODING, "").lower()
if encoding and encoding not in ("deflate", "gzip", "identity"):
raise RuntimeError(f"unknown content encoding: {encoding}")
if encoding == "identity":
encoding = None

# te encoding
te_encoding = payload.headers.get(CONTENT_TRANSFER_ENCODING, "").lower()
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
raise RuntimeError(f"unknown content transfer encoding: {te_encoding}")
if te_encoding == "binary":
te_encoding = None

# size
size = payload.size
if size is not None and not (encoding or te_encoding):
payload.headers[CONTENT_LENGTH] = str(size)

self._parts.append((payload, encoding, te_encoding)) # type: ignore[arg-type]
return payload
Expand Down
46 changes: 1 addition & 45 deletions tests/test_client_functional.py
Expand Up @@ -1388,50 +1388,6 @@ async def handler(request):
resp.close()


async def test_POST_DATA_with_context_transfer_encoding(aiohttp_client: Any) -> None:
async def handler(request):
data = await request.post()
assert data["name"] == "text"
return web.Response(text=data["name"])

app = web.Application()
app.router.add_post("/", handler)
client = await aiohttp_client(app)

form = aiohttp.FormData()
form.add_field("name", "text", content_transfer_encoding="base64")

resp = await client.post("/", data=form)
assert 200 == resp.status
content = await resp.text()
assert content == "text"
resp.close()


async def test_POST_DATA_with_content_type_context_transfer_encoding(
aiohttp_client: Any,
):
async def handler(request):
data = await request.post()
assert data["name"] == "text"
return web.Response(body=data["name"])

app = web.Application()
app.router.add_post("/", handler)
client = await aiohttp_client(app)

form = aiohttp.FormData()
form.add_field(
"name", "text", content_type="text/plain", content_transfer_encoding="base64"
)

resp = await client.post("/", data=form)
assert 200 == resp.status
content = await resp.text()
assert content == "text"
resp.close()


async def test_POST_MultiDict(aiohttp_client: Any) -> None:
async def handler(request):
data = await request.post()
Expand Down Expand Up @@ -1483,7 +1439,7 @@ async def handler(request):

with fname.open("rb") as f:
async with client.post(
"/", data={"some": f, "test": b"data"}, chunked=True
"/", data={"some": f, "test": io.BytesIO(b"data")}, chunked=True
) as resp:
assert 200 == resp.status

Expand Down