Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions proxy/http/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,7 @@ def _discover_plugin_klass(self, protocol: int) -> Optional[Type['HttpProtocolHa
def _parse_first_request(self, data: memoryview) -> bool:
# Parse http request
try:
# TODO(abhinavsingh): Remove .tobytes after parser is
# memoryview compliant
self.request.parse(data.tobytes())
self.request.parse(data)
except HttpProtocolException as e: # noqa: WPS329
self.work.queue(BAD_REQUEST_RESPONSE_PKT)
raise e
Expand Down
8 changes: 4 additions & 4 deletions proxy/http/parser/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ def __init__(self) -> None:
# Expected size of next following chunk
self.size: Optional[int] = None

def parse(self, raw: bytes) -> bytes:
def parse(self, raw: memoryview) -> memoryview:
more = len(raw) > 0
while more and self.state != chunkParserStates.COMPLETE:
more, raw = self.process(raw)
more, raw = self.process(raw.tobytes())
return raw

def process(self, raw: bytes) -> Tuple[bool, bytes]:
def process(self, raw: bytes) -> Tuple[bool, memoryview]:
if self.state == chunkParserStates.WAITING_FOR_SIZE:
# Consume prior chunk in buffer
# in case chunk size without CRLF was received
Expand Down Expand Up @@ -69,7 +69,7 @@ def process(self, raw: bytes) -> Tuple[bool, bytes]:
self.state = chunkParserStates.WAITING_FOR_SIZE
self.chunk = b''
self.size = None
return len(raw) > 0, raw
return len(raw) > 0, memoryview(raw)

@staticmethod
def to_chunks(raw: bytes, chunk_size: int = DEFAULT_BUFFER_SIZE) -> bytes:
Expand Down
70 changes: 38 additions & 32 deletions proxy/http/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
# Total size of raw bytes passed for parsing
self.total_size: int = 0
# Buffer to hold unprocessed bytes
self.buffer: bytes = b''
self.buffer: Optional[memoryview] = None
# Internal headers data structure:
# - Keys are lower case header names.
# - Values are 2-tuple containing original
Expand All @@ -102,13 +102,13 @@ def request(
httpParserTypes.REQUEST_PARSER,
enable_proxy_protocol=enable_proxy_protocol,
)
parser.parse(raw)
parser.parse(memoryview(raw))
return parser

@classmethod
def response(cls: Type[T], raw: bytes) -> T:
parser = cls(httpParserTypes.RESPONSE_PARSER)
parser.parse(raw)
parser.parse(memoryview(raw))
return parser

def header(self, key: bytes) -> bytes:
Expand Down Expand Up @@ -206,14 +206,21 @@ def body_expected(self) -> bool:
"""Returns true if content or chunked response is expected."""
return self._content_expected or self._is_chunked_encoded

def parse(self, raw: bytes, allowed_url_schemes: Optional[List[bytes]] = None) -> None:
def parse(
self,
raw: memoryview,
allowed_url_schemes: Optional[List[bytes]] = None,
) -> None:
"""Parses HTTP request out of raw bytes.

Check for `HttpParser.state` after `parse` has successfully returned."""
size = len(raw)
self.total_size += size
raw = self.buffer + raw
self.buffer, more = b'', size > 0
if self.buffer:
# TODO(abhinavsingh): Instead of tobytes our parser
# must be capable of working with arrays of memoryview
raw = memoryview(self.buffer.tobytes() + raw.tobytes())
self.buffer, more = None, size > 0
while more and self.state != httpParserStates.COMPLETE:
# gte with HEADERS_COMPLETE also encapsulated RCVING_BODY state
if self.state >= httpParserStates.HEADERS_COMPLETE:
Expand All @@ -237,7 +244,7 @@ def parse(self, raw: bytes, allowed_url_schemes: Optional[List[bytes]] = None) -
not (self._content_expected or self._is_chunked_encoded) and \
raw == b'':
self.state = httpParserStates.COMPLETE
self.buffer = raw
self.buffer = None if raw == b'' else raw

def build(self, disable_headers: Optional[List[bytes]] = None, for_proxy: bool = False) -> bytes:
"""Rebuild the request object."""
Expand Down Expand Up @@ -278,7 +285,7 @@ def build_response(self) -> bytes:
body=self._get_body_or_chunks(),
)

def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
def _process_body(self, raw: memoryview) -> Tuple[bool, memoryview]:
# Ref: http://www.ietf.org/rfc/rfc2616.txt
# 3.If a Content-Length header field (section 14.13) is present, its
# decimal value in OCTETs represents both the entity-length and the
Expand All @@ -297,7 +304,8 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
self.body = self.chunk.body
self.state = httpParserStates.COMPLETE
more = False
elif self._content_expected:
return more, raw
if self._content_expected:
self.state = httpParserStates.RCVING_BODY
if self.body is None:
self.body = b''
Expand All @@ -307,23 +315,21 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
if self.body and \
len(self.body) == int(self.header(b'content-length')):
self.state = httpParserStates.COMPLETE
more, raw = len(raw) > 0, raw[total_size - received_size:]
else:
self.state = httpParserStates.RCVING_BODY
# Received a packet without content-length header
# and no transfer-encoding specified.
#
# This can happen for both HTTP/1.0 and HTTP/1.1 scenarios.
# Currently, we consume the remaining buffer as body.
#
# Ref https://github.com/abhinavsingh/proxy.py/issues/398
#
# See TestHttpParser.test_issue_398 scenario
self.body = raw
more, raw = False, b''
return more, raw

def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]:
return len(raw) > 0, raw[total_size - received_size:]
# Received a packet without content-length header
# and no transfer-encoding specified.
#
# This can happen for both HTTP/1.0 and HTTP/1.1 scenarios.
# Currently, we consume the remaining buffer as body.
#
# Ref https://github.com/abhinavsingh/proxy.py/issues/398
#
# See TestHttpParser.test_issue_398 scenario
self.state = httpParserStates.RCVING_BODY
self.body = raw
return False, memoryview(b'')

def _process_headers(self, raw: memoryview) -> Tuple[bool, memoryview]:
"""Returns False when no CRLF could be found in received bytes.

TODO: We should not return until parser reaches headers complete
Expand All @@ -334,10 +340,10 @@ def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]:
This will also help make the parser even more stateless.
"""
while True:
parts = raw.split(CRLF, 1)
parts = raw.tobytes().split(CRLF, 1)
if len(parts) == 1:
return False, raw
line, raw = parts[0], parts[1]
line, raw = parts[0], memoryview(parts[1])
if self.state in (httpParserStates.LINE_RCVD, httpParserStates.RCVING_HEADERS):
if line == b'' or line.strip() == b'': # Blank line received.
self.state = httpParserStates.HEADERS_COMPLETE
Expand All @@ -352,14 +358,14 @@ def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]:

def _process_line(
self,
raw: bytes,
raw: memoryview,
allowed_url_schemes: Optional[List[bytes]] = None,
) -> Tuple[bool, bytes]:
) -> Tuple[bool, memoryview]:
while True:
parts = raw.split(CRLF, 1)
parts = raw.tobytes().split(CRLF, 1)
if len(parts) == 1:
return False, raw
line, raw = parts[0], parts[1]
line, raw = parts[0], memoryview(parts[1])
if self.type == httpParserTypes.REQUEST_PARSER:
if self.protocol is not None and self.protocol.version is None:
# We expect to receive entire proxy protocol v1 line
Expand Down
17 changes: 4 additions & 13 deletions proxy/http/proxy/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,8 @@ async def read_from_descriptors(self, r: Readables) -> bool:
if self.response.is_complete:
self.handle_pipeline_response(raw)
else:
# TODO(abhinavsingh): Remove .tobytes after parser is
# memoryview compliant
chunk = raw.tobytes()
self.response.parse(chunk)
self.emit_response_events(len(chunk))
self.response.parse(raw)
self.emit_response_events(len(raw))
else:
self.response.total_size += len(raw)
# queue raw data for client
Expand Down Expand Up @@ -430,7 +427,6 @@ def on_client_data(self, raw: memoryview) -> None:
# must be treated as WebSocket protocol packets.
self.upstream.queue(raw)
return

if self.pipeline_request is None:
# For pipeline requests, we never
# want to use --enable-proxy-protocol flag
Expand All @@ -443,10 +439,7 @@ def on_client_data(self, raw: memoryview) -> None:
self.pipeline_request = HttpParser(
httpParserTypes.REQUEST_PARSER,
)

# TODO(abhinavsingh): Remove .tobytes after parser is
# memoryview compliant
self.pipeline_request.parse(raw.tobytes())
self.pipeline_request.parse(raw)
if self.pipeline_request.is_complete:
for plugin in self.plugins.values():
assert self.pipeline_request is not None
Expand Down Expand Up @@ -555,9 +548,7 @@ def handle_pipeline_response(self, raw: memoryview) -> None:
self.pipeline_response = HttpParser(
httpParserTypes.RESPONSE_PARSER,
)
# TODO(abhinavsingh): Remove .tobytes after parser is memoryview
# compliant
self.pipeline_response.parse(raw.tobytes())
self.pipeline_response.parse(raw)
if self.pipeline_response.is_complete:
self.pipeline_response = None

Expand Down
4 changes: 1 addition & 3 deletions proxy/http/server/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,7 @@ def on_client_data(self, raw: memoryview) -> None:
self.pipeline_request = HttpParser(
httpParserTypes.REQUEST_PARSER,
)
# TODO(abhinavsingh): Remove .tobytes after parser is memoryview
# compliant
self.pipeline_request.parse(raw.tobytes())
self.pipeline_request.parse(raw)
if self.pipeline_request.is_complete:
self.route.handle_request(self.pipeline_request)
if not self.pipeline_request.is_http_1_1_keep_alive:
Expand Down
4 changes: 1 addition & 3 deletions proxy/http/websocket/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def upgrade(self) -> None:
),
)
response = HttpParser(httpParserTypes.RESPONSE_PARSER)
response.parse(self.sock.recv(DEFAULT_BUFFER_SIZE))
response.parse(memoryview(self.sock.recv(DEFAULT_BUFFER_SIZE)))
accept = response.header(b'Sec-Websocket-Accept')
assert WebsocketFrame.key_to_accept(key) == accept

Expand All @@ -100,8 +100,6 @@ def run_once(self) -> bool:
self.closed = True
return True
frame = WebsocketFrame()
# TODO(abhinavsingh): Remove .tobytes after parser is
# memoryview compliant
frame.parse(raw.tobytes())
self.on_message(frame)
elif mask & selectors.EVENT_WRITE:
Expand Down
2 changes: 1 addition & 1 deletion proxy/plugin/modify_chunk_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
def handle_upstream_chunk(self, chunk: memoryview) -> Optional[memoryview]:
# Parse the response.
# Note that these chunks also include headers
self.response.parse(chunk.tobytes())
self.response.parse(chunk)
# If response is complete, modify and dispatch to client
if self.response.is_complete:
# Avoid setting a body for responses where a body is not expected.
Expand Down
36 changes: 19 additions & 17 deletions tests/http/parser/test_chunk_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@ def setUp(self) -> None:

def test_chunk_parse_basic(self) -> None:
self.parser.parse(
b''.join([
b'4\r\n',
b'Wiki\r\n',
b'5\r\n',
b'pedia\r\n',
b'E\r\n',
b' in\r\n\r\nchunks.\r\n',
b'0\r\n',
b'\r\n',
]),
memoryview(
b''.join([
b'4\r\n',
b'Wiki\r\n',
b'5\r\n',
b'pedia\r\n',
b'E\r\n',
b' in\r\n\r\nchunks.\r\n',
b'0\r\n',
b'\r\n',
]),
),
)
self.assertEqual(self.parser.chunk, b'')
self.assertEqual(self.parser.size, None)
Expand All @@ -38,55 +40,55 @@ def test_chunk_parse_basic(self) -> None:

def test_chunk_parse_issue_27(self) -> None:
"""Case when data ends with the chunk size but without ending CRLF."""
self.parser.parse(b'3')
self.parser.parse(memoryview(b'3'))
self.assertEqual(self.parser.chunk, b'3')
self.assertEqual(self.parser.size, None)
self.assertEqual(self.parser.body, b'')
self.assertEqual(
self.parser.state,
chunkParserStates.WAITING_FOR_SIZE,
)
self.parser.parse(b'\r\n')
self.parser.parse(memoryview(b'\r\n'))
self.assertEqual(self.parser.chunk, b'')
self.assertEqual(self.parser.size, 3)
self.assertEqual(self.parser.body, b'')
self.assertEqual(
self.parser.state,
chunkParserStates.WAITING_FOR_DATA,
)
self.parser.parse(b'abc')
self.parser.parse(memoryview(b'abc'))
self.assertEqual(self.parser.chunk, b'')
self.assertEqual(self.parser.size, None)
self.assertEqual(self.parser.body, b'abc')
self.assertEqual(
self.parser.state,
chunkParserStates.WAITING_FOR_SIZE,
)
self.parser.parse(b'\r\n')
self.parser.parse(memoryview(b'\r\n'))
self.assertEqual(self.parser.chunk, b'')
self.assertEqual(self.parser.size, None)
self.assertEqual(self.parser.body, b'abc')
self.assertEqual(
self.parser.state,
chunkParserStates.WAITING_FOR_SIZE,
)
self.parser.parse(b'4\r\n')
self.parser.parse(memoryview(b'4\r\n'))
self.assertEqual(self.parser.chunk, b'')
self.assertEqual(self.parser.size, 4)
self.assertEqual(self.parser.body, b'abc')
self.assertEqual(
self.parser.state,
chunkParserStates.WAITING_FOR_DATA,
)
self.parser.parse(b'defg\r\n0')
self.parser.parse(memoryview(b'defg\r\n0'))
self.assertEqual(self.parser.chunk, b'0')
self.assertEqual(self.parser.size, None)
self.assertEqual(self.parser.body, b'abcdefg')
self.assertEqual(
self.parser.state,
chunkParserStates.WAITING_FOR_SIZE,
)
self.parser.parse(b'\r\n\r\n')
self.parser.parse(memoryview(b'\r\n\r\n'))
self.assertEqual(self.parser.chunk, b'')
self.assertEqual(self.parser.size, None)
self.assertEqual(self.parser.body, b'abcdefg')
Expand Down
Loading