Skip to content

Commit

Permalink
Support reading multipart data with \n (LF) lines
Browse files Browse the repository at this point in the history
While RFC clearly says about `CRLF` newlines, there quite a lot of
implementations which uses just `LF`. Even Python's stdlib produces
multiparts with `\n` newlines by default for compatibility reasons.

We wouldn't change how we produce multipart content - here we follow
RFC. However, we can detect `\n` lines quite easily which makes their
support quite cheap.
  • Loading branch information
kxepal committed Jan 6, 2019
1 parent 3a0c7bf commit e9c8c33
Show file tree
Hide file tree
Showing 3 changed files with 384 additions and 216 deletions.
1 change: 1 addition & 0 deletions CHANGES/2302.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support reading multipart data with `\n` (`LF`) lines
51 changes: 38 additions & 13 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,16 @@ class BodyPartReader:

chunk_size = 8192

def __init__(self, boundary: bytes,
headers: Mapping[str, Optional[str]],
content: StreamReader) -> None:
def __init__(
self,
boundary: bytes,
headers: Mapping[str, Optional[str]],
content: StreamReader,
newline: bytes = b'\r\n',
) -> None:
self.headers = headers
self._boundary = boundary
self._newline = newline
self._content = content
self._at_eof = False
length = self.headers.get(CONTENT_LENGTH, None)
Expand Down Expand Up @@ -300,8 +305,8 @@ async def read_chunk(self, size: int=chunk_size) -> bytes:
if self._read_bytes == self._length:
self._at_eof = True
if self._at_eof:
clrf = await self._content.readline()
assert b'\r\n' == clrf, \
newline = await self._content.readline()
assert newline == self._newline, \
'reader did not read all the data or it is malformed'
return chunk

Expand All @@ -328,11 +333,15 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
assert self._content_eof < 3, "Reading after EOF"
assert self._prev_chunk is not None
window = self._prev_chunk + chunk
sub = b'\r\n' + self._boundary

intermeditate_boundary = self._newline + self._boundary

if first_chunk:
idx = window.find(sub)
pos = 0
else:
idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
pos = max(0, len(self._prev_chunk) - len(intermeditate_boundary))

idx = window.find(intermeditate_boundary, pos)
if idx >= 0:
# pushing boundary back to content
with warnings.catch_warnings():
Expand All @@ -344,6 +353,7 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
chunk = window[len(self._prev_chunk):idx]
if not chunk:
self._at_eof = True

result = self._prev_chunk
self._prev_chunk = chunk
return result
Expand Down Expand Up @@ -372,7 +382,8 @@ async def readline(self) -> bytes:
else:
next_line = await self._content.readline()
if next_line.startswith(self._boundary):
line = line[:-2] # strip CRLF but only once
# strip newline but only once
line = line[:-len(self._newline)]
self._unread.append(next_line)

return line
Expand Down Expand Up @@ -516,10 +527,15 @@ class MultipartReader:
#: Body part reader class for non multipart/* content types.
part_reader_cls = BodyPartReader

def __init__(self, headers: Mapping[str, str],
content: StreamReader) -> None:
def __init__(
self,
headers: Mapping[str, str],
content: StreamReader,
newline: bytes = b'\r\n',
) -> None:
self.headers = headers
self._boundary = ('--' + self._get_boundary()).encode()
self._newline = newline
self._content = content
self._last_part = None
self._at_eof = False
Expand Down Expand Up @@ -592,9 +608,13 @@ def _get_part_reader(self, headers: 'CIMultiDictProxy[str]') -> Any:
if mimetype.type == 'multipart':
if self.multipart_reader_cls is None:
return type(self)(headers, self._content)
return self.multipart_reader_cls(headers, self._content)
return self.multipart_reader_cls(
headers, self._content, self._newline
)
else:
return self.part_reader_cls(self._boundary, headers, self._content)
return self.part_reader_cls(
self._boundary, headers, self._content, self._newline
)

def _get_boundary(self) -> str:
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
Expand Down Expand Up @@ -625,6 +645,11 @@ async def _read_until_first_boundary(self) -> None:
if chunk == b'':
raise ValueError("Could not find starting boundary %r"
% (self._boundary))
if chunk.startswith(self._boundary):
_, newline = chunk.split(self._boundary, 1)
assert newline in (b'\r\n', b'\n')
self._newline = newline

chunk = chunk.rstrip()
if chunk == self._boundary:
return
Expand Down
Loading

0 comments on commit e9c8c33

Please sign in to comment.