From b8589380f7caad570aa86f81f1253e0fdc09406a Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 1 Dec 2021 05:08:18 +0530 Subject: [PATCH 1/3] Avoid proxy of requests to private IP within `ProxyPoolPlugin` --- proxy/http/handler.py | 19 ++++++++++--------- proxy/http/parser/parser.py | 10 ++++++---- proxy/http/plugin.py | 1 + proxy/http/proxy/plugin.py | 2 ++ proxy/http/proxy/server.py | 5 ----- proxy/plugin/proxy_pool.py | 25 +++++++++++++++++-------- 6 files changed, 36 insertions(+), 26 deletions(-) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 1256defd9c..b6cd6f250d 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -189,19 +189,12 @@ async def handle_events( return False def handle_data(self, data: memoryview) -> Optional[bool]: + """Handles incoming data from client.""" if data is None: logger.debug('Client closed connection, tearing down...') self.work.closed = True return True - try: - # HttpProtocolHandlerPlugin.on_client_data - # Can raise HttpProtocolException to tear down the connection - for plugin in self.plugins.values(): - optional_data = plugin.on_client_data(data) - if optional_data is None: - break - data = optional_data # Don't parse incoming data any further after 1st request has completed. # # This specially does happen for pipeline requests. @@ -209,7 +202,7 @@ def handle_data(self, data: memoryview) -> Optional[bool]: # Plugins can utilize on_client_data for such cases and # apply custom logic to handle request data sent after 1st # valid request. - if data and self.request.state != httpParserStates.COMPLETE: + if self.request.state != httpParserStates.COMPLETE: # Parse http request # # TODO(abhinavsingh): Remove .tobytes after parser is @@ -229,6 +222,14 @@ def handle_data(self, data: memoryview) -> Optional[bool]: plugin_.client._conn = upgraded_sock elif isinstance(upgraded_sock, bool) and upgraded_sock is True: return True + else: + # HttpProtocolHandlerPlugin.on_client_data + # Can raise HttpProtocolException to tear down the connection + for plugin in self.plugins.values(): + optional_data = plugin.on_client_data(data) + if optional_data is None: + break + data = optional_data except HttpProtocolException as e: logger.debug('HttpProtocolException raised') response: Optional[memoryview] = e.response(self.request) diff --git a/proxy/http/parser/parser.py b/proxy/http/parser/parser.py index 9df644fa64..2785b246ec 100644 --- a/proxy/http/parser/parser.py +++ b/proxy/http/parser/parser.py @@ -15,7 +15,7 @@ from typing import TypeVar, Optional, Dict, Type, Tuple, List from ...common.constants import DEFAULT_DISABLE_HEADERS, COLON, DEFAULT_ENABLE_PROXY_PROTOCOL -from ...common.constants import HTTP_1_1, HTTP_1_0, SLASH, CRLF +from ...common.constants import HTTP_1_1, SLASH, CRLF from ...common.constants import WHITESPACE, DEFAULT_HTTP_PORT from ...common.utils import build_http_request, build_http_response, find_http_line, text_ from ...common.flag import flags @@ -271,7 +271,7 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]: self.body = self.chunk.body self.state = httpParserStates.COMPLETE more = False - elif b'content-length' in self.headers: + elif self.content_expected: self.state = httpParserStates.RCVING_BODY if self.body is None: self.body = b'' @@ -283,13 +283,15 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]: self.state = httpParserStates.COMPLETE more, raw = len(raw) > 0, raw[total_size - received_size:] else: - # HTTP/1.0 scenario only - assert self.version == HTTP_1_0 self.state = httpParserStates.RCVING_BODY # Received a packet without content-length header # and no transfer-encoding specified. # + # This can happen for both HTTP/1.0 and HTTP/1.1 scenarios. + # Currently, we consume the remaining buffer as body. + # # Ref https://github.com/abhinavsingh/proxy.py/issues/398 + # # See TestHttpParser.test_issue_398 scenario self.body = raw more, raw = False, b'' diff --git a/proxy/http/plugin.py b/proxy/http/plugin.py index d5510b5c2b..a1e169c2de 100644 --- a/proxy/http/plugin.py +++ b/proxy/http/plugin.py @@ -94,6 +94,7 @@ async def read_from_descriptors(self, r: Readables) -> bool: @abstractmethod def on_client_data(self, raw: memoryview) -> Optional[memoryview]: + """Called only after original request has been completely received.""" return raw # pragma: no cover @abstractmethod diff --git a/proxy/http/proxy/plugin.py b/proxy/http/proxy/plugin.py index 81392d1d63..a0f7763698 100644 --- a/proxy/http/proxy/plugin.py +++ b/proxy/http/proxy/plugin.py @@ -121,6 +121,8 @@ def handle_client_data( Essentially, if you return None from within before_upstream_connection, be prepared to handle_client_data and not handle_client_request. + Only called after initial request from client has been received. + Raise HttpRequestRejected to tear down the connection Return None to drop the connection """ diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index 2dafc4300d..7a7622bd12 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -903,7 +903,6 @@ def wrap_client(self) -> bool: def emit_request_complete(self) -> None: if not self.flags.enable_events: return - assert self.request.port self.event_queue.publish( request_id=self.uid.hex, @@ -924,7 +923,6 @@ def emit_request_complete(self) -> None: def emit_response_events(self, chunk_size: int) -> None: if not self.flags.enable_events: return - if self.response.state == httpParserStates.COMPLETE: self.emit_response_complete() elif self.response.state == httpParserStates.RCVING_BODY: @@ -935,7 +933,6 @@ def emit_response_events(self, chunk_size: int) -> None: def emit_response_headers_complete(self) -> None: if not self.flags.enable_events: return - self.event_queue.publish( request_id=self.uid.hex, event_name=eventNames.RESPONSE_HEADERS_COMPLETE, @@ -948,7 +945,6 @@ def emit_response_headers_complete(self) -> None: def emit_response_chunk_received(self, chunk_size: int) -> None: if not self.flags.enable_events: return - self.event_queue.publish( request_id=self.uid.hex, event_name=eventNames.RESPONSE_CHUNK_RECEIVED, @@ -962,7 +958,6 @@ def emit_response_chunk_received(self, chunk_size: int) -> None: def emit_response_complete(self) -> None: if not self.flags.enable_events: return - self.event_queue.publish( request_id=self.uid.hex, event_name=eventNames.RESPONSE_COMPLETE, diff --git a/proxy/plugin/proxy_pool.py b/proxy/plugin/proxy_pool.py index 1005aca38e..f93282a184 100644 --- a/proxy/plugin/proxy_pool.py +++ b/proxy/plugin/proxy_pool.py @@ -10,10 +10,12 @@ """ import random import logging +import ipaddress from typing import Dict, List, Optional, Any from ..common.flag import flags +from ..common.utils import text_ from ..http import Url, httpMethods from ..http.parser import HttpParser @@ -78,15 +80,22 @@ def before_upstream_connection( ) -> Optional[HttpParser]: """Avoids establishing the default connection to upstream server by returning None. + + TODO(abhinavsingh): Ideally connection to upstream proxy endpoints + must be bootstrapped within it's own re-usable and gc'd pool, to avoid establishing + a fresh upstream proxy connection for each client request. + + See :class:`~proxy.core.connection.pool.ConnectionPool` which is a work + in progress for SSL cache handling. """ - # TODO(abhinavsingh): Ideally connection to upstream proxy endpoints - # must be bootstrapped within it's own re-usable and gc'd pool, to avoid establishing - # a fresh upstream proxy connection for each client request. - # - # See :class:`~proxy.core.connection.pool.ConnectionPool` which is a work - # in progress for SSL cache handling. - # - # Implement your own logic here e.g. round-robin, least connection etc. + # We don't want to send private IP requests to remote proxies + try: + if ipaddress.ip_address(text_(request.host)).is_private: + return request + except ValueError: + pass + # Choose a random proxy from the pool + # TODO: Implement your own logic here e.g. round-robin, least connection etc. endpoint = random.choice(self.flags.proxy_pool)[0].split(':', 1) if endpoint[0] == 'localhost' and endpoint[1] == '8899': return request From fc5a0b67df950bce0f28603eb6d3fc4ddbf7170f Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 1 Dec 2021 05:14:31 +0530 Subject: [PATCH 2/3] Fix tests --- tests/http/test_http_proxy_tls_interception.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index 1fcfa71720..e10ec0fcd3 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -145,9 +145,8 @@ async def asyncReturnBool(val: bool) -> bool: # Assert our mocked plugins invocations self.plugin.return_value.get_descriptors.assert_called() self.plugin.return_value.write_to_descriptors.assert_called_with([]) - self.plugin.return_value.on_client_data.assert_called_with( - connect_request, - ) + # on_client_data is only called after initial request has completed + self.plugin.return_value.on_client_data.assert_not_called() self.plugin.return_value.on_request_complete.assert_called() self.plugin.return_value.read_from_descriptors.assert_called_with([ self._conn.fileno(), From cc86eead23327e50bf8dd82f8749f50e6f8cbc9a Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 1 Dec 2021 05:21:11 +0530 Subject: [PATCH 3/3] spell fix --- proxy/plugin/proxy_pool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proxy/plugin/proxy_pool.py b/proxy/plugin/proxy_pool.py index f93282a184..c39c6af01d 100644 --- a/proxy/plugin/proxy_pool.py +++ b/proxy/plugin/proxy_pool.py @@ -82,8 +82,8 @@ def before_upstream_connection( by returning None. TODO(abhinavsingh): Ideally connection to upstream proxy endpoints - must be bootstrapped within it's own re-usable and gc'd pool, to avoid establishing - a fresh upstream proxy connection for each client request. + must be bootstrapped within it's own re-usable and garbage collected pool, + to avoid establishing a new upstream proxy connection for each client request. See :class:`~proxy.core.connection.pool.ConnectionPool` which is a work in progress for SSL cache handling.