Skip to content

Commit

Permalink
Support reading multipart data with \n (LF) lines (#3492)
Browse files Browse the repository at this point in the history
* Support reading multipart data with `\n` (`LF`) lines

While RFC clearly says about `CRLF` newlines, there quite a lot of
implementations which uses just `LF`. Even Python's stdlib produces
multiparts with `\n` newlines by default for compatibility reasons.

We wouldn't change how we produce multipart content - here we follow
RFC. However, we can detect `\n` lines quite easily which makes their
support quite cheap.

* Add test about mixed newlines

Just for case. That's a strange case, but it seems we pass it.

* Make newline argument as keyword one and explicitly private one

This argument is not designed to be defined by users. Depending on
parsing multipart newline format it will be chosen automatically and
due to recursive native of multipart format it have to be passed around
for nested readers.
  • Loading branch information
kxepal authored and asvetlov committed Jan 15, 2019
1 parent 8fbe7a1 commit fcedc66
Show file tree
Hide file tree
Showing 3 changed files with 429 additions and 208 deletions.
1 change: 1 addition & 0 deletions CHANGES/2302.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support reading multipart data with `\n` (`LF`) lines
53 changes: 40 additions & 13 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,17 @@ class BodyPartReader:

chunk_size = 8192

def __init__(self, boundary: bytes,
headers: Mapping[str, Optional[str]],
content: StreamReader) -> None:
def __init__(
self,
boundary: bytes,
headers: Mapping[str, Optional[str]],
content: StreamReader,
*,
_newline: bytes = b'\r\n'
) -> None:
self.headers = headers
self._boundary = boundary
self._newline = _newline
self._content = content
self._at_eof = False
length = self.headers.get(CONTENT_LENGTH, None)
Expand Down Expand Up @@ -300,8 +306,8 @@ async def read_chunk(self, size: int=chunk_size) -> bytes:
if self._read_bytes == self._length:
self._at_eof = True
if self._at_eof:
clrf = await self._content.readline()
assert b'\r\n' == clrf, \
newline = await self._content.readline()
assert newline == self._newline, \
'reader did not read all the data or it is malformed'
return chunk

Expand All @@ -328,11 +334,15 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
assert self._content_eof < 3, "Reading after EOF"
assert self._prev_chunk is not None
window = self._prev_chunk + chunk
sub = b'\r\n' + self._boundary

intermeditate_boundary = self._newline + self._boundary

if first_chunk:
idx = window.find(sub)
pos = 0
else:
idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
pos = max(0, len(self._prev_chunk) - len(intermeditate_boundary))

idx = window.find(intermeditate_boundary, pos)
if idx >= 0:
# pushing boundary back to content
with warnings.catch_warnings():
Expand All @@ -344,6 +354,7 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
chunk = window[len(self._prev_chunk):idx]
if not chunk:
self._at_eof = True

result = self._prev_chunk
self._prev_chunk = chunk
return result
Expand Down Expand Up @@ -372,7 +383,8 @@ async def readline(self) -> bytes:
else:
next_line = await self._content.readline()
if next_line.startswith(self._boundary):
line = line[:-2] # strip CRLF but only once
# strip newline but only once
line = line[:-len(self._newline)]
self._unread.append(next_line)

return line
Expand Down Expand Up @@ -516,10 +528,16 @@ class MultipartReader:
#: Body part reader class for non multipart/* content types.
part_reader_cls = BodyPartReader

def __init__(self, headers: Mapping[str, str],
content: StreamReader) -> None:
def __init__(
self,
headers: Mapping[str, str],
content: StreamReader,
*,
_newline: bytes = b'\r\n'
) -> None:
self.headers = headers
self._boundary = ('--' + self._get_boundary()).encode()
self._newline = _newline
self._content = content
self._last_part = None
self._at_eof = False
Expand Down Expand Up @@ -592,9 +610,13 @@ def _get_part_reader(self, headers: 'CIMultiDictProxy[str]') -> Any:
if mimetype.type == 'multipart':
if self.multipart_reader_cls is None:
return type(self)(headers, self._content)
return self.multipart_reader_cls(headers, self._content)
return self.multipart_reader_cls(
headers, self._content, _newline=self._newline
)
else:
return self.part_reader_cls(self._boundary, headers, self._content)
return self.part_reader_cls(
self._boundary, headers, self._content, _newline=self._newline
)

def _get_boundary(self) -> str:
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
Expand Down Expand Up @@ -625,6 +647,11 @@ async def _read_until_first_boundary(self) -> None:
if chunk == b'':
raise ValueError("Could not find starting boundary %r"
% (self._boundary))
if chunk.startswith(self._boundary):
_, newline = chunk.split(self._boundary, 1)
assert newline in (b'\r\n', b'\n')
self._newline = newline

chunk = chunk.rstrip()
if chunk == self._boundary:
return
Expand Down
Loading

0 comments on commit fcedc66

Please sign in to comment.