diff --git a/README.md b/README.md index 992cde04f2..b87bfff2f6 100644 --- a/README.md +++ b/README.md @@ -760,9 +760,8 @@ Response body `Hello from man in the middle` is sent by our plugin. Forward incoming proxy requests to a set of upstream proxy servers. -Let's start upstream proxies first. - -Start `proxy.py` on port `9000` and `9001` +Let's start 2 upstream proxies first. To simulate upstream proxies, +start `proxy.py` on port `9000` and `9001` ```console ❯ proxy --port 9000 @@ -789,6 +788,10 @@ Make a curl request via `8899` proxy: Verify that `8899` proxy forwards requests to upstream proxies by checking respective logs. +If an upstream proxy require credentials, pass them as arguments. Example: + +`--proxy-pool user:pass@upstream.proxy:port` + ### FilterByClientIpPlugin Reject traffic from specific IP addresses. By default this @@ -2092,7 +2095,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.4.0rc3.dev33+gc341594.d20211214 +proxy.py v2.4.0b4.dev12+g19e6881.d20211221 options: -h, --help show this help message and exit @@ -2175,9 +2178,9 @@ options: generated HTTPS certificates. If used, must also pass --ca-cert-file and --ca-signing-key-file --ca-cert-dir CA_CERT_DIR - Default: ~/.proxy.py. Directory to store dynamically - generated certificates. Also see --ca-key-file, --ca- - cert-file and --ca-signing-key-file + Default: ~/.proxy/certificates. Directory to store + dynamically generated certificates. Also see --ca-key- + file, --ca-cert-file and --ca-signing-key-file --ca-cert-file CA_CERT_FILE Default: None. Signing certificate to use for signing dynamically generated HTTPS certificates. If used, diff --git a/proxy/common/constants.py b/proxy/common/constants.py index 745281165e..7b5a947d99 100644 --- a/proxy/common/constants.py +++ b/proxy/common/constants.py @@ -43,10 +43,24 @@ def _env_threadless_compliant() -> bool: COMMA = b',' DOT = b'.' SLASH = b'/' -HTTP_1_0 = b'HTTP/1.0' -HTTP_1_1 = b'HTTP/1.1' -HTTP_URL_PREFIX = b'http://' -HTTPS_URL_PREFIX = b'https://' +AT = b'@' +HTTP_PROTO = b'http' +HTTPS_PROTO = HTTP_PROTO + b's' +HTTP_1_0 = HTTP_PROTO.upper() + SLASH + b'1.0' +HTTP_1_1 = HTTP_PROTO.upper() + SLASH + b'1.1' +HTTP_URL_PREFIX = HTTP_PROTO + COLON + SLASH + SLASH +HTTPS_URL_PREFIX = HTTPS_PROTO + COLON + SLASH + SLASH + +LOCAL_INTERFACE_HOSTNAMES = ( + b'localhost', + b'127.0.0.1', + b'::1', +) + +ANY_INTERFACE_HOSTNAMES = ( + b'0.0.0.0', + b'::', +) PROXY_AGENT_HEADER_KEY = b'Proxy-agent' PROXY_AGENT_HEADER_VALUE = b'proxy.py v' + \ diff --git a/proxy/http/__init__.py b/proxy/http/__init__.py index f8d2e4fa4f..b918c3ecf9 100644 --- a/proxy/http/__init__.py +++ b/proxy/http/__init__.py @@ -12,6 +12,7 @@ from .plugin import HttpProtocolHandlerPlugin from .codes import httpStatusCodes from .methods import httpMethods +from .headers import httpHeaders from .url import Url __all__ = [ @@ -19,5 +20,6 @@ 'HttpProtocolHandlerPlugin', 'httpStatusCodes', 'httpMethods', + 'httpHeaders', 'Url', ] diff --git a/proxy/http/headers.py b/proxy/http/headers.py new file mode 100644 index 0000000000..d042067c02 --- /dev/null +++ b/proxy/http/headers.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. + + .. spelling:: + + http + iterable +""" +from typing import NamedTuple + + +# Ref: https://www.iana.org/assignments/http-methods/http-methods.xhtml +HttpHeaders = NamedTuple( + 'HttpHeaders', [ + ('PROXY_AUTHORIZATION', bytes), + ('PROXY_CONNECTION', bytes), + ], +) + +httpHeaders = HttpHeaders( + b'proxy-authorization', + b'proxy-connection', +) diff --git a/proxy/http/proxy/auth.py b/proxy/http/proxy/auth.py index be0bccd79b..a309d194c7 100644 --- a/proxy/http/proxy/auth.py +++ b/proxy/http/proxy/auth.py @@ -19,6 +19,8 @@ from ...common.flag import flags from ...common.constants import DEFAULT_BASIC_AUTH + +from ...http import httpHeaders from ...http.parser import HttpParser from ...http.proxy import HttpProxyBasePlugin @@ -39,9 +41,9 @@ def before_upstream_connection( self, request: HttpParser, ) -> Optional[HttpParser]: if self.flags.auth_code and request.headers: - if b'proxy-authorization' not in request.headers: + if httpHeaders.PROXY_AUTHORIZATION not in request.headers: raise ProxyAuthenticationFailed() - parts = request.headers[b'proxy-authorization'][1].split() + parts = request.headers[httpHeaders.PROXY_AUTHORIZATION][1].split() if len(parts) != 2 \ or parts[0].lower() != b'basic' \ or parts[1] != self.flags.auth_code: diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index 3e41b15625..2fda84f8d5 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -26,6 +26,7 @@ from .plugin import HttpProxyBasePlugin +from ..headers import httpHeaders from ..methods import httpMethods from ..codes import httpStatusCodes from ..plugin import HttpProtocolHandlerPlugin @@ -557,7 +558,10 @@ def on_request_complete(self) -> Union[socket.socket, bool]: # officially documented in any specification, drop it. # - proxy-authorization is of no use for upstream, remove it. self.request.del_headers( - [b'proxy-authorization', b'proxy-connection'], + [ + httpHeaders.PROXY_AUTHORIZATION, + httpHeaders.PROXY_CONNECTION, + ], ) # - For HTTP/1.0, connection header defaults to close # - For HTTP/1.1, connection header defaults to keep-alive diff --git a/proxy/http/url.py b/proxy/http/url.py index a7fc4390cb..9a5db36611 100644 --- a/proxy/http/url.py +++ b/proxy/http/url.py @@ -15,7 +15,7 @@ """ from typing import Optional, Tuple -from ..common.constants import COLON, SLASH, HTTP_URL_PREFIX, HTTPS_URL_PREFIX +from ..common.constants import COLON, SLASH, HTTP_URL_PREFIX, HTTPS_URL_PREFIX, AT from ..common.utils import text_ @@ -28,15 +28,24 @@ class Url: def __init__( self, scheme: Optional[bytes] = None, + username: Optional[bytes] = None, + password: Optional[bytes] = None, hostname: Optional[bytes] = None, port: Optional[int] = None, remainder: Optional[bytes] = None, ) -> None: self.scheme: Optional[bytes] = scheme + self.username: Optional[bytes] = username + self.password: Optional[bytes] = password self.hostname: Optional[bytes] = hostname self.port: Optional[int] = port self.remainder: Optional[bytes] = remainder + @property + def has_credentials(self) -> bool: + """Returns true if both username and password components are present.""" + return self.username is not None and self.password is not None + def __str__(self) -> str: url = '' if self.scheme: @@ -74,29 +83,40 @@ def from_bytes(cls, raw: bytes) -> 'Url': if is_https \ else raw[len(b'http://'):] parts = rest.split(SLASH, 1) - host, port = Url.parse_host_and_port(parts[0]) + username, password, host, port = Url._parse(parts[0]) return cls( scheme=b'https' if is_https else b'http', + username=username, + password=password, hostname=host, port=port, remainder=None if len(parts) == 1 else ( SLASH + parts[1] ), ) - host, port = Url.parse_host_and_port(raw) - return cls(hostname=host, port=port) + username, password, host, port = Url._parse(raw) + return cls(username=username, password=password, hostname=host, port=port) @staticmethod - def parse_host_and_port(raw: bytes) -> Tuple[bytes, Optional[int]]: - parts = raw.split(COLON, 2) + def _parse(raw: bytes) -> Tuple[ + Optional[bytes], + Optional[bytes], + bytes, + Optional[int], + ]: + split_at = raw.split(AT, 1) + username, password = None, None + if len(split_at) == 2: + username, password = split_at[0].split(COLON) + parts = split_at[-1].split(COLON, 2) num_parts = len(parts) port: Optional[int] = None # No port found if num_parts == 1: - return parts[0], None + return username, password, parts[0], None # Host and port found if num_parts == 2: - return COLON.join(parts[:-1]), int(parts[-1]) + return username, password, COLON.join(parts[:-1]), int(parts[-1]) # More than a single COLON i.e. IPv6 scenario try: # Try to resolve last part as an int port @@ -114,4 +134,4 @@ def parse_host_and_port(raw: bytes) -> Tuple[bytes, Optional[int]]: rhost[0] != '[' and \ rhost[-1] != ']': host = b'[' + host + b']' - return host, port + return username, password, host, port diff --git a/proxy/plugin/proxy_pool.py b/proxy/plugin/proxy_pool.py index c39c6af01d..bfe00aaafa 100644 --- a/proxy/plugin/proxy_pool.py +++ b/proxy/plugin/proxy_pool.py @@ -8,6 +8,7 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ +import base64 import random import logging import ipaddress @@ -15,9 +16,10 @@ from typing import Dict, List, Optional, Any from ..common.flag import flags -from ..common.utils import text_ +from ..common.utils import text_, bytes_ +from ..common.constants import COLON, LOCAL_INTERFACE_HOSTNAMES, ANY_INTERFACE_HOSTNAMES -from ..http import Url, httpMethods +from ..http import Url, httpMethods, httpHeaders from ..http.parser import HttpParser from ..http.exception import HttpProtocolException from ..http.proxy import HttpProxyBasePlugin @@ -67,8 +69,9 @@ class ProxyPoolPlugin(TcpUpstreamConnectionHandler, HttpProxyBasePlugin): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) + self._endpoint: Url = self._select_proxy() # Cached attributes to be used during access log override - self.request_host_port_path_method: List[Any] = [ + self._metadata: List[Any] = [ None, None, None, None, ] @@ -94,20 +97,22 @@ def before_upstream_connection( return request except ValueError: pass - # Choose a random proxy from the pool - # TODO: Implement your own logic here e.g. round-robin, least connection etc. - endpoint = random.choice(self.flags.proxy_pool)[0].split(':', 1) - if endpoint[0] == 'localhost' and endpoint[1] == '8899': + # If chosen proxy is the local instance, bypass upstream proxies + assert self._endpoint.port and self._endpoint.hostname + if self._endpoint.port == self.flags.port and \ + self._endpoint.hostname in LOCAL_INTERFACE_HOSTNAMES + ANY_INTERFACE_HOSTNAMES: return request - logger.debug('Using endpoint: {0}:{1}'.format(*endpoint)) - self.initialize_upstream(endpoint[0], int(endpoint[1])) + # Establish connection to chosen upstream proxy + endpoint_tuple = (text_(self._endpoint.hostname), self._endpoint.port) + logger.debug('Using endpoint: {0}:{1}'.format(*endpoint_tuple)) + self.initialize_upstream(*endpoint_tuple) assert self.upstream try: self.upstream.connect() except TimeoutError: logger.info( 'Timed out connecting to upstream proxy {0}:{1}'.format( - *endpoint, + *endpoint_tuple, ), ) raise HttpProtocolException() @@ -121,13 +126,13 @@ def before_upstream_connection( # check. logger.info( 'Connection refused by upstream proxy {0}:{1}'.format( - *endpoint, + *endpoint_tuple, ), ) raise HttpProtocolException() logger.debug( 'Established connection to upstream proxy {0}:{1}'.format( - *endpoint, + *endpoint_tuple, ), ) return None @@ -154,10 +159,21 @@ def handle_client_request( 443 if request.is_https_tunnel else 80 ) path = None if not request.path else request.path.decode() - self.request_host_port_path_method = [ + self._metadata = [ host, port, path, request.method, ] - # Queue original request to upstream proxy + # Queue original request optionally with auth headers to upstream proxy + if self._endpoint.has_credentials: + assert self._endpoint.username and self._endpoint.password + request.add_header( + httpHeaders.PROXY_AUTHORIZATION, + b'Basic ' + + base64.b64encode( + self._endpoint.username + + COLON + + self._endpoint.password, + ), + ) self.upstream.queue(memoryview(request.build(for_proxy=True))) return request @@ -189,9 +205,9 @@ def on_access_log(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]: context.update({ 'upstream_proxy_host': addr, 'upstream_proxy_port': port, - 'server_host': self.request_host_port_path_method[0], - 'server_port': self.request_host_port_path_method[1], - 'request_path': self.request_host_port_path_method[2], + 'server_host': self._metadata[0], + 'server_port': self._metadata[1], + 'request_path': self._metadata[2], 'response_bytes': self.total_size, }) self.access_log(context) @@ -199,7 +215,14 @@ def on_access_log(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]: def access_log(self, log_attrs: Dict[str, Any]) -> None: access_log_format = DEFAULT_HTTPS_ACCESS_LOG_FORMAT - request_method = self.request_host_port_path_method[3] + request_method = self._metadata[3] if request_method and request_method != httpMethods.CONNECT: access_log_format = DEFAULT_HTTP_ACCESS_LOG_FORMAT logger.info(access_log_format.format_map(log_attrs)) + + def _select_proxy(self) -> Url: + """Choose a random proxy from the pool. + + TODO: Implement your own logic here e.g. round-robin, least connection etc. + """ + return Url.from_bytes(bytes_(random.choice(self.flags.proxy_pool)[0])) diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index 9b4feb1bc5..fdf1b9d6da 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -15,7 +15,7 @@ from proxy.common.flag import FlagParser from proxy.http.exception.proxy_auth_failed import ProxyAuthenticationFailed -from proxy.http import HttpProtocolHandler +from proxy.http import HttpProtocolHandler, httpHeaders from proxy.core.connection import TcpClientConnection from proxy.common.utils import build_http_request @@ -77,7 +77,7 @@ async def test_proxy_auth_fails_with_invalid_cred(self) -> None: b'GET', b'http://upstream.host/not-found.html', headers={ b'Host': b'upstream.host', - b'Proxy-Authorization': b'Basic hello', + httpHeaders.PROXY_AUTHORIZATION: b'Basic hello', }, ) self.mock_selector.return_value.select.side_effect = [ @@ -105,7 +105,7 @@ async def test_proxy_auth_works_with_valid_cred(self) -> None: b'GET', b'http://upstream.host/not-found.html', headers={ b'Host': b'upstream.host', - b'Proxy-Authorization': b'Basic dXNlcjpwYXNz', + httpHeaders.PROXY_AUTHORIZATION: b'Basic dXNlcjpwYXNz', }, ) self.mock_selector.return_value.select.side_effect = [ @@ -129,7 +129,7 @@ async def test_proxy_auth_works_with_mixed_case_basic_string(self) -> None: b'GET', b'http://upstream.host/not-found.html', headers={ b'Host': b'upstream.host', - b'Proxy-Authorization': b'bAsIc dXNlcjpwYXNz', + httpHeaders.PROXY_AUTHORIZATION: b'bAsIc dXNlcjpwYXNz', }, ) self.mock_selector.return_value.select.side_effect = [ diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index ca6cce944e..c5f1ff1163 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -26,7 +26,7 @@ from proxy.http.proxy import HttpProxyPlugin from proxy.http.parser import httpParserStates, httpParserTypes from proxy.http.exception import ProxyAuthenticationFailed, ProxyConnectionFailed -from proxy.http import HttpProtocolHandler +from proxy.http import HttpProtocolHandler, httpHeaders from ..test_assertions import Assertions @@ -321,8 +321,8 @@ async def test_authenticated_proxy_http_get(self) -> None: b'User-Agent: proxy.py/%s' % bytes_(__version__), b'Host: localhost:%d' % self.http_server_port, b'Accept: */*', - b'Proxy-Connection: Keep-Alive', - b'Proxy-Authorization: Basic dXNlcjpwYXNz', + httpHeaders.PROXY_CONNECTION + b': Keep-Alive', + httpHeaders.PROXY_AUTHORIZATION + b': Basic dXNlcjpwYXNz', CRLF, ]) await self.assert_data_queued(server) @@ -354,8 +354,8 @@ async def test_authenticated_proxy_http_tunnel(self) -> None: b'CONNECT localhost:%d HTTP/1.1' % self.http_server_port, b'Host: localhost:%d' % self.http_server_port, b'User-Agent: proxy.py/%s' % bytes_(__version__), - b'Proxy-Connection: Keep-Alive', - b'Proxy-Authorization: Basic dXNlcjpwYXNz', + httpHeaders.PROXY_CONNECTION + b': Keep-Alive', + httpHeaders.PROXY_AUTHORIZATION + b': Basic dXNlcjpwYXNz', CRLF, ]) await self.assert_tunnel_response(server) diff --git a/tests/http/test_url.py b/tests/http/test_url.py index de8ec0e71a..a640f74cf3 100644 --- a/tests/http/test_url.py +++ b/tests/http/test_url.py @@ -114,3 +114,23 @@ def test_trailing_slash_url(self) -> None: self.assertEqual(url.hostname, b'localhost') self.assertEqual(url.port, 12345) self.assertEqual(url.remainder, b'/v1/users/') + self.assertEqual(url.username, None) + self.assertEqual(url.password, None) + + def test_username_password(self) -> None: + url = Url.from_bytes(b'http://user:pass@localhost:12345/v1/users/') + self.assertEqual(url.scheme, b'http') + self.assertEqual(url.hostname, b'localhost') + self.assertEqual(url.port, 12345) + self.assertEqual(url.remainder, b'/v1/users/') + self.assertEqual(url.username, b'user') + self.assertEqual(url.password, b'pass') + + def test_username_password_without_proto_prefix(self) -> None: + url = Url.from_bytes('user:pass@å∫ç.com'.encode('utf-8')) + self.assertEqual(url.scheme, None) + self.assertEqual(url.hostname, 'å∫ç.com'.encode('utf-8')) + self.assertEqual(url.port, None) + self.assertEqual(url.remainder, None) + self.assertEqual(url.username, b'user') + self.assertEqual(url.password, b'pass')