diff --git a/README.md b/README.md index ec7f71713f..3bf8ce98d8 100644 --- a/README.md +++ b/README.md @@ -2206,23 +2206,26 @@ To run standalone benchmark for `proxy.py`, use the following command from repo ```console ❯ proxy -h -usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] - [--threaded] [--num-workers NUM_WORKERS] - [--local-executor LOCAL_EXECUTOR] [--backlog BACKLOG] - [--hostname HOSTNAME] [--port PORT] [--port-file PORT_FILE] - [--unix-socket-path UNIX_SOCKET_PATH] - [--num-acceptors NUM_ACCEPTORS] [--version] [--log-level LOG_LEVEL] - [--log-file LOG_FILE] [--log-format LOG_FORMAT] - [--open-file-limit OPEN_FILE_LIMIT] +usage: -m [-h] [--tunnel-hostname TUNNEL_HOSTNAME] [--tunnel-port TUNNEL_PORT] + [--tunnel-username TUNNEL_USERNAME] + [--tunnel-ssh-key TUNNEL_SSH_KEY] + [--tunnel-ssh-key-passphrase TUNNEL_SSH_KEY_PASSPHRASE] + [--tunnel-remote-port TUNNEL_REMOTE_PORT] [--enable-events] + [--threadless] [--threaded] [--num-workers NUM_WORKERS] + [--backlog BACKLOG] [--hostname HOSTNAME] [--port PORT] + [--port-file PORT_FILE] [--unix-socket-path UNIX_SOCKET_PATH] + [--local-executor LOCAL_EXECUTOR] [--num-acceptors NUM_ACCEPTORS] + [--version] [--log-level LOG_LEVEL] [--log-file LOG_FILE] + [--log-format LOG_FORMAT] [--open-file-limit OPEN_FILE_LIMIT] [--plugins PLUGINS [PLUGINS ...]] [--enable-dashboard] - [--work-klass WORK_KLASS] [--pid-file PID_FILE] - [--enable-proxy-protocol] - [--client-recvbuf-size CLIENT_RECVBUF_SIZE] [--key-file KEY_FILE] - [--timeout TIMEOUT] [--server-recvbuf-size SERVER_RECVBUF_SIZE] - [--disable-http-proxy] [--disable-headers DISABLE_HEADERS] - [--ca-key-file CA_KEY_FILE] [--ca-cert-dir CA_CERT_DIR] - [--ca-cert-file CA_CERT_FILE] [--ca-file CA_FILE] - [--ca-signing-key-file CA_SIGNING_KEY_FILE] [--cert-file CERT_FILE] + [--enable-ssh-tunnel] [--work-klass WORK_KLASS] + [--pid-file PID_FILE] [--enable-conn-pool] [--key-file KEY_FILE] + [--cert-file CERT_FILE] [--client-recvbuf-size CLIENT_RECVBUF_SIZE] + [--server-recvbuf-size SERVER_RECVBUF_SIZE] [--timeout TIMEOUT] + [--enable-proxy-protocol] [--disable-http-proxy] + [--disable-headers DISABLE_HEADERS] [--ca-key-file CA_KEY_FILE] + [--ca-cert-dir CA_CERT_DIR] [--ca-cert-file CA_CERT_FILE] + [--ca-file CA_FILE] [--ca-signing-key-file CA_SIGNING_KEY_FILE] [--auth-plugin AUTH_PLUGIN] [--basic-auth BASIC_AUTH] [--cache-dir CACHE_DIR] [--filtered-upstream-hosts FILTERED_UPSTREAM_HOSTS] @@ -2235,15 +2238,28 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.4.0rc6.dev13+ga9b8034.d20220104 +proxy.py v2.4.0rc7.dev12+gd234339.d20220116 options: -h, --help show this help message and exit + --tunnel-hostname TUNNEL_HOSTNAME + Default: None. Remote hostname or IP address to which + SSH tunnel will be established. + --tunnel-port TUNNEL_PORT + Default: 22. SSH port of the remote host. + --tunnel-username TUNNEL_USERNAME + Default: None. Username to use for establishing SSH + tunnel. + --tunnel-ssh-key TUNNEL_SSH_KEY + Default: None. Private key path in pem format + --tunnel-ssh-key-passphrase TUNNEL_SSH_KEY_PASSPHRASE + Default: None. Private key passphrase + --tunnel-remote-port TUNNEL_REMOTE_PORT + Default: 8899. Remote port which will be forwarded + locally for proxy. --enable-events Default: False. Enables core to dispatch lifecycle events. Plugins can be used to subscribe for core events. - --enable-conn-pool Default: False. (WIP) Enable upstream connection - pooling. --threadless Default: True. Enabled by default on Python 3.8+ (mac, linux). When disabled a new thread is spawned to handle each client connection. @@ -2252,14 +2268,6 @@ options: handle each client connection. --num-workers NUM_WORKERS Defaults to number of CPU cores. - --local-executor LOCAL_EXECUTOR - Default: 1. Enabled by default. Use 0 to disable. When - enabled acceptors will make use of local (same - process) executor instead of distributing load across - remote (other process) executors. Enable this option - to achieve CPU affinity between acceptors and - executors, instead of using underlying OS kernel - scheduling algorithm. --backlog BACKLOG Default: 100. Maximum number of pending connections to proxy server --hostname HOSTNAME Default: 127.0.0.1. Server IP address. @@ -2270,6 +2278,14 @@ options: --unix-socket-path UNIX_SOCKET_PATH Default: None. Unix socket path to use. When provided --host and --port flags are ignored + --local-executor LOCAL_EXECUTOR + Default: 1. Enabled by default. Use 0 to disable. When + enabled acceptors will make use of local (same + process) executor instead of distributing load across + remote (other process) executors. Enable this option + to achieve CPU affinity between acceptors and + executors, instead of using underlying OS kernel + scheduling algorithm. --num-acceptors NUM_ACCEPTORS Defaults to number of CPU cores. --version, -v Prints proxy.py version. @@ -2288,25 +2304,32 @@ options: Comma separated plugins. You may use --plugins flag multiple times. --enable-dashboard Default: False. Enables proxy.py dashboard. + --enable-ssh-tunnel Default: False. Enable SSH tunnel. --work-klass WORK_KLASS Default: proxy.http.HttpProtocolHandler. Work klass to use for work execution. --pid-file PID_FILE Default: None. Save "parent" process ID to a file. - --enable-proxy-protocol - Default: False. If used, will enable proxy protocol. - Only version 1 is currently supported. - --client-recvbuf-size CLIENT_RECVBUF_SIZE - Default: 128 KB. Maximum amount of data received from - the client in a single recv() operation. + --enable-conn-pool Default: False. (WIP) Enable upstream connection + pooling. --key-file KEY_FILE Default: None. Server key file to enable end-to-end TLS encryption with clients. If used, must also pass --cert-file. - --timeout TIMEOUT Default: 10.0. Number of seconds after which an - inactive connection must be dropped. Inactivity is - defined by no data sent or received by the client. + --cert-file CERT_FILE + Default: None. Server certificate to enable end-to-end + TLS encryption with clients. If used, must also pass + --key-file. + --client-recvbuf-size CLIENT_RECVBUF_SIZE + Default: 128 KB. Maximum amount of data received from + the client in a single recv() operation. --server-recvbuf-size SERVER_RECVBUF_SIZE Default: 128 KB. Maximum amount of data received from the server in a single recv() operation. + --timeout TIMEOUT Default: 10.0. Number of seconds after which an + inactive connection must be dropped. Inactivity is + defined by no data sent or received by the client. + --enable-proxy-protocol + Default: False. If used, will enable proxy protocol. + Only version 1 is currently supported. --disable-http-proxy Default: False. Whether to disable proxy.HttpProxyPlugin. --disable-headers DISABLE_HEADERS @@ -2333,10 +2356,6 @@ options: Default: None. CA signing key to use for dynamic generation of HTTPS certificates. If used, must also pass --ca-key-file and --ca-cert-file - --cert-file CERT_FILE - Default: None. Server certificate to enable end-to-end - TLS encryption with clients. If used, must also pass - --key-file. --auth-plugin AUTH_PLUGIN Default: proxy.http.proxy.AuthPlugin. Auth plugin to use instead of default basic auth plugin. diff --git a/proxy/common/flag.py b/proxy/common/flag.py index 63bef59818..9244ad38fe 100644 --- a/proxy/common/flag.py +++ b/proxy/common/flag.py @@ -307,8 +307,8 @@ def initialize( # See https://github.com/abhinavsingh/proxy.py/pull/714 description # to understand rationale behind the following logic. # - # --num-workers flag or option was found. We will use - # the same value for num_acceptors when --num-acceptors flag + # Num workers flag or option was found. We will use + # the same value for num_acceptors when num acceptors flag # is absent. if num_workers != DEFAULT_NUM_WORKERS and num_acceptors == DEFAULT_NUM_ACCEPTORS: args.num_acceptors = args.num_workers diff --git a/proxy/core/acceptor/pool.py b/proxy/core/acceptor/pool.py index 745d672e38..0e17cd106a 100644 --- a/proxy/core/acceptor/pool.py +++ b/proxy/core/acceptor/pool.py @@ -21,16 +21,17 @@ from multiprocessing import connection from multiprocessing.reduction import send_handle -from typing import Any, List, Optional +from typing import TYPE_CHECKING, Any, List, Optional from .listener import Listener from .acceptor import Acceptor -from ..event import EventQueue - from ...common.flag import flags from ...common.constants import DEFAULT_NUM_ACCEPTORS +if TYPE_CHECKING: # pragma: no cover + from ..event import EventQueue + logger = logging.getLogger(__name__) @@ -69,7 +70,7 @@ def __init__( executor_queues: List[connection.Connection], executor_pids: List[int], executor_locks: List['multiprocessing.synchronize.Lock'], - event_queue: Optional[EventQueue] = None, + event_queue: Optional['EventQueue'] = None, ) -> None: self.flags = flags # File descriptor to use for accepting new work @@ -79,7 +80,7 @@ def __init__( self.executor_pids: List[int] = executor_pids self.executor_locks: List['multiprocessing.synchronize.Lock'] = executor_locks # Eventing core queue - self.event_queue: Optional[EventQueue] = event_queue + self.event_queue: Optional['EventQueue'] = event_queue # Acceptor process instances self.acceptors: List[Acceptor] = [] # Fd queues used to share file descriptor with acceptor processes diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 7b4a306362..c14d3d8263 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -22,6 +22,7 @@ from ..common.types import Readables, SelectableEvents, Writables from ..common.constants import DEFAULT_SELECTOR_SELECT_TIMEOUT +from .protocols import httpProtocols from .connection import HttpClientConnection from .exception import HttpProtocolException from .plugin import HttpProtocolHandlerPlugin @@ -260,6 +261,8 @@ def _initialize_plugin( def _discover_plugin_klass(self, protocol: int) -> Optional[Type['HttpProtocolHandlerPlugin']]: """Discovers and return matching HTTP handler plugin matching protocol.""" + if protocol == httpProtocols.UNKNOWN: + return None if b'HttpProtocolHandlerPlugin' in self.flags.plugins: for klass in self.flags.plugins[b'HttpProtocolHandlerPlugin']: k: Type['HttpProtocolHandlerPlugin'] = klass diff --git a/proxy/http/parser/parser.py b/proxy/http/parser/parser.py index ecb774ef13..daeed72979 100644 --- a/proxy/http/parser/parser.py +++ b/proxy/http/parser/parser.py @@ -14,7 +14,7 @@ """ from typing import TypeVar, Optional, Dict, Type, Tuple, List -from ...common.constants import DEFAULT_DISABLE_HEADERS, COLON, DEFAULT_ENABLE_PROXY_PROTOCOL +from ...common.constants import DEFAULT_DISABLE_HEADERS, COLON, DEFAULT_ENABLE_PROXY_PROTOCOL, HTTP_1_0 from ...common.constants import HTTP_1_1, SLASH, CRLF from ...common.constants import WHITESPACE, DEFAULT_HTTP_PORT from ...common.utils import build_http_request, build_http_response, text_ @@ -157,7 +157,12 @@ def set_url(self, url: bytes) -> None: @property def http_handler_protocol(self) -> int: """Returns `HttpProtocols` that this request belongs to.""" - return httpProtocols.HTTP_PROXY if self.host is not None else httpProtocols.WEB_SERVER + if self.version in (HTTP_1_1, HTTP_1_0) and self._url is not None: + if self.host is not None: + return httpProtocols.HTTP_PROXY + if self._url.hostname is None: + return httpProtocols.WEB_SERVER + return httpProtocols.UNKNOWN @property def is_complete(self) -> bool: diff --git a/proxy/http/protocols.py b/proxy/http/protocols.py index 976a41852b..49485720c3 100644 --- a/proxy/http/protocols.py +++ b/proxy/http/protocols.py @@ -18,6 +18,7 @@ HttpProtocols = NamedTuple( 'HttpProtocols', [ + ('UNKNOWN', int), # Web server handling HTTP/1.0, HTTP/1.1, HTTP/2, HTTP/3 # over plain Text or encrypted connection with clients ('WEB_SERVER', int), @@ -30,4 +31,4 @@ ], ) -httpProtocols = HttpProtocols(1, 2, 3) +httpProtocols = HttpProtocols(1, 2, 3, 4) diff --git a/proxy/http/url.py b/proxy/http/url.py index fc06412b0f..f282b2bd3a 100644 --- a/proxy/http/url.py +++ b/proxy/http/url.py @@ -15,7 +15,7 @@ """ from typing import Optional, Tuple -from ..common.constants import COLON, SLASH, HTTP_URL_PREFIX, HTTPS_URL_PREFIX, AT +from ..common.constants import COLON, SLASH, AT from ..common.utils import text_ @@ -68,29 +68,41 @@ def from_bytes(cls, raw: bytes) -> 'Url': For a HTTPS connect tunnel, url is like ``httpbin.org:443`` For a HTTP proxy request, url is like ``http://httpbin.org/get`` + proxy.py internally never expects a https scheme in the request line. + But `Url` class provides support for parsing any scheme present in the URLs. + e.g. ftp, icap etc. + + If a url with no scheme is parsed, e.g. ``//host/abc.js``, then scheme + defaults to `http`. + Further: 1) URL may contain unicode characters 2) URL may contain IPv4 and IPv6 format addresses instead of domain names - - We use heuristics based approach for our URL parser. """ # SLASH == 47, check if URL starts with single slash but not double slash - is_single_slash = raw[0] == 47 - is_double_slash = is_single_slash and len(raw) >= 2 and raw[1] == 47 - if is_single_slash and not is_double_slash: + starts_with_single_slash = raw[0] == 47 + starts_with_double_slash = starts_with_single_slash and \ + len(raw) >= 2 and \ + raw[1] == 47 + if starts_with_single_slash and \ + not starts_with_double_slash: return cls(remainder=raw) - is_http = raw.startswith(HTTP_URL_PREFIX) - is_https = raw.startswith(HTTPS_URL_PREFIX) - if is_http or is_https or is_double_slash: - rest = raw[len(b'https://'):] \ - if is_https \ - else raw[len(b'http://'):] \ - if is_http \ - else raw[len(SLASH + SLASH):] + scheme = None + rest = None + if not starts_with_double_slash: + # Find scheme + parts = raw.split(b'://', 1) + if len(parts) == 2: + scheme = parts[0] + rest = parts[1] + else: + rest = raw[len(SLASH + SLASH):] + if scheme is not None or starts_with_double_slash: + assert rest is not None parts = rest.split(SLASH, 1) username, password, host, port = Url._parse(parts[0]) return cls( - scheme=b'https' if is_https else b'http', + scheme=scheme if not starts_with_double_slash else b'http', username=username, password=password, hostname=host, diff --git a/tests/http/parser/test_http_parser.py b/tests/http/parser/test_http_parser.py index 69f7e6f9cb..740c683711 100644 --- a/tests/http/parser/test_http_parser.py +++ b/tests/http/parser/test_http_parser.py @@ -678,9 +678,7 @@ def test_is_http_1_1_keep_alive(self) -> None: ) self.assertTrue(self.parser.is_http_1_1_keep_alive) - def test_is_http_1_1_keep_alive_with_non_close_connection_header( - self, - ) -> None: + def test_is_http_1_1_keep_alive_with_non_close_connection_header(self) -> None: self.parser.parse( build_http_request( httpMethods.GET, b'/', @@ -811,3 +809,42 @@ def test_is_safe_against_malicious_requests(self) -> None: b'//198.98.53.25:1389/TomcatBypass/Command/Base64d2dldCA0Ni4xNjEuNTIuMzcvRXhwbG9pd' + b'C5zaDsgY2htb2QgK3ggRXhwbG9pdC5zaDsgLi9FeHBsb2l0LnNoOw==}', ) + + def test_parses_icap_protocol(self) -> None: + # Ref https://datatracker.ietf.org/doc/html/rfc3507 + self.parser.parse( + b'REQMOD icap://icap-server.net/server?arg=87 ICAP/1.0\r\n' + + b'Host: icap-server.net\r\n' + + b'Encapsulated: req-hdr=0, req-body=154' + + b'\r\n\r\n' + + b'POST /origin-resource/form.pl HTTP/1.1\r\n' + + b'Host: www.origin-server.com\r\n' + + b'Accept: text/html, text/plain\r\n' + + b'Accept-Encoding: compress\r\n' + + b'Cache-Control: no-cache\r\n' + + b'\r\n' + + b'1e\r\n' + + b'I am posting this information.\r\n' + + b'0\r\n' + + b'\r\n', + ) + self.assertEqual(self.parser.method, b'REQMOD') + assert self.parser._url is not None + self.assertEqual(self.parser._url.scheme, b'icap') + + def test_cannot_parse_sip_protocol(self) -> None: + # Will fail to parse because of invalid host and port in the request line + # Our Url parser expects an integer port. + with self.assertRaises(ValueError): + self.parser.parse( + b'OPTIONS sip:nm SIP/2.0\r\n' + + b'Via: SIP/2.0/TCP nm;branch=foo\r\n' + + b'From: ;tag=root\r\nTo: \r\n' + + b'Call-ID: 50000\r\n' + + b'CSeq: 42 OPTIONS\r\n' + + b'Max-Forwards: 70\r\n' + + b'Content-Length: 0\r\n' + + b'Contact: \r\n' + + b'Accept: application/sdp\r\n' + + b'\r\n', + ) diff --git a/tests/http/test_url.py b/tests/http/test_url.py index 0cfb8c667a..958dc098bb 100644 --- a/tests/http/test_url.py +++ b/tests/http/test_url.py @@ -143,3 +143,12 @@ def test_no_scheme_suffix(self) -> None: self.assertEqual(url.remainder, b'/server?arg=87') self.assertEqual(url.username, None) self.assertEqual(url.password, None) + + def test_any_scheme_suffix(self) -> None: + url = Url.from_bytes(b'icap://example-server.net/server?arg=87') + self.assertEqual(url.scheme, b'icap') + self.assertEqual(url.hostname, b'example-server.net') + self.assertEqual(url.port, None) + self.assertEqual(url.remainder, b'/server?arg=87') + self.assertEqual(url.username, None) + self.assertEqual(url.password, None)