Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support reading multipart data with \n (LF) lines #3492

Merged
merged 5 commits into from
Jan 15, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/2302.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support reading multipart data with `\n` (`LF`) lines
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm.. we need to watch for incorrect single quotes in RST...

51 changes: 38 additions & 13 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,16 @@ class BodyPartReader:

chunk_size = 8192

def __init__(self, boundary: bytes,
headers: Mapping[str, Optional[str]],
content: StreamReader) -> None:
def __init__(
self,
boundary: bytes,
headers: Mapping[str, Optional[str]],
content: StreamReader,
newline: bytes = b'\r\n',
kxepal marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
self.headers = headers
self._boundary = boundary
self._newline = newline
self._content = content
self._at_eof = False
length = self.headers.get(CONTENT_LENGTH, None)
Expand Down Expand Up @@ -300,8 +305,8 @@ async def read_chunk(self, size: int=chunk_size) -> bytes:
if self._read_bytes == self._length:
self._at_eof = True
if self._at_eof:
clrf = await self._content.readline()
assert b'\r\n' == clrf, \
newline = await self._content.readline()
assert newline == self._newline, \
'reader did not read all the data or it is malformed'
return chunk

Expand All @@ -328,11 +333,15 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
assert self._content_eof < 3, "Reading after EOF"
assert self._prev_chunk is not None
window = self._prev_chunk + chunk
sub = b'\r\n' + self._boundary

intermeditate_boundary = self._newline + self._boundary

if first_chunk:
idx = window.find(sub)
pos = 0
else:
idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
pos = max(0, len(self._prev_chunk) - len(intermeditate_boundary))

idx = window.find(intermeditate_boundary, pos)
if idx >= 0:
# pushing boundary back to content
with warnings.catch_warnings():
Expand All @@ -344,6 +353,7 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
chunk = window[len(self._prev_chunk):idx]
if not chunk:
self._at_eof = True

result = self._prev_chunk
self._prev_chunk = chunk
return result
Expand Down Expand Up @@ -372,7 +382,8 @@ async def readline(self) -> bytes:
else:
next_line = await self._content.readline()
if next_line.startswith(self._boundary):
line = line[:-2] # strip CRLF but only once
# strip newline but only once
line = line[:-len(self._newline)]
self._unread.append(next_line)

return line
Expand Down Expand Up @@ -516,10 +527,15 @@ class MultipartReader:
#: Body part reader class for non multipart/* content types.
part_reader_cls = BodyPartReader

def __init__(self, headers: Mapping[str, str],
content: StreamReader) -> None:
def __init__(
self,
headers: Mapping[str, str],
content: StreamReader,
newline: bytes = b'\r\n',
) -> None:
self.headers = headers
self._boundary = ('--' + self._get_boundary()).encode()
self._newline = newline
self._content = content
self._last_part = None
self._at_eof = False
Expand Down Expand Up @@ -592,9 +608,13 @@ def _get_part_reader(self, headers: 'CIMultiDictProxy[str]') -> Any:
if mimetype.type == 'multipart':
if self.multipart_reader_cls is None:
return type(self)(headers, self._content)
return self.multipart_reader_cls(headers, self._content)
return self.multipart_reader_cls(
headers, self._content, self._newline
)
else:
return self.part_reader_cls(self._boundary, headers, self._content)
return self.part_reader_cls(
self._boundary, headers, self._content, self._newline
)

def _get_boundary(self) -> str:
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
Expand Down Expand Up @@ -625,6 +645,11 @@ async def _read_until_first_boundary(self) -> None:
if chunk == b'':
raise ValueError("Could not find starting boundary %r"
% (self._boundary))
if chunk.startswith(self._boundary):
_, newline = chunk.split(self._boundary, 1)
assert newline in (b'\r\n', b'\n')
self._newline = newline

chunk = chunk.rstrip()
if chunk == self._boundary:
return
Expand Down
Loading