From 3cb2979426894eaacab2f981a0e5355014d0d852 Mon Sep 17 00:00:00 2001 From: Brian Bouterse Date: Wed, 29 Sep 2021 15:00:30 -0400 Subject: [PATCH] Add support for secure proxies in the client This patch opens up the code path and adds the implementation that allows end-users to start sending HTTPS requests through HTTPS proxies. The support for TLS-in-TLS (needed for this to work) in the stdlib is kinda available since Python 3.7 but is disabled for `asyncio` with an attribute/flag/toggle. When the upstream CPython enables it finally, aiohttp v3.8+ will be able to work with it out of the box. Currently the tests monkey-patch `asyncio` in order to verify that this works. The users who are willing to do the same, will be able to take advantage of it right now. Eventually (hopefully starting Python 3.11), the need for monkey-patching should be eliminated. Refs: * https://bugs.python.org/issue37179 * https://github.com/python/cpython/pull/28073 * https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support * https://github.com/aio-libs/aiohttp/discussions/6044 Resolves #3816 Resolves #4268 Co-Authored-By: Brian Bouterse Co-Authored-By: Jordan Borean Co-Authored-By: Sviatoslav Sydorenko --- CHANGES/5992.feature | 3 + CONTRIBUTORS.txt | 2 + aiohttp/client_reqrep.py | 2 - aiohttp/connector.py | 127 +++++++++++++++++++++++++++++---- docs/client_advanced.rst | 35 ++++++++- tests/test_client_request.py | 5 -- tests/test_proxy.py | 115 +++++------------------------ tests/test_proxy_functional.py | 74 +++++++++++++++++-- 8 files changed, 234 insertions(+), 129 deletions(-) create mode 100644 CHANGES/5992.feature diff --git a/CHANGES/5992.feature b/CHANGES/5992.feature new file mode 100644 index 0000000000..5667d2de24 --- /dev/null +++ b/CHANGES/5992.feature @@ -0,0 +1,3 @@ +Added support for HTTPS proxies to the extent CPython's +:py:mod:`asyncio` supports it -- by :user:`bmbouter`, +:user:`jborean93` and :user:`webknjaz`. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 8669ccd3b2..45d9a1a5fb 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -57,6 +57,7 @@ Boris Feld Borys Vorona Boyi Chen Brett Cannon +Brian Bouterse Brian C. Lane Brian Muller Bruce Merry @@ -165,6 +166,7 @@ Jonas Obrist Jonathan Wright Jonny Tan Joongi Kim +Jordan Borean Josep Cugat Josh Junon Joshu Coats diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index c63b73bcdf..8c64db600e 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -481,8 +481,6 @@ def update_proxy( proxy_auth: Optional[BasicAuth], proxy_headers: Optional[LooseHeaders], ) -> None: - if proxy and not proxy.scheme == "http": - raise ValueError("Only http proxies are supported") if proxy_auth and not isinstance(proxy_auth, helpers.BasicAuth): raise ValueError("proxy_auth must be None or BasicAuth() tuple") self.proxy = proxy diff --git a/aiohttp/connector.py b/aiohttp/connector.py index ab5124966b..c1b0f89ce1 100644 --- a/aiohttp/connector.py +++ b/aiohttp/connector.py @@ -951,6 +951,100 @@ async def _wrap_create_connection( except OSError as exc: raise client_error(req.connection_key, exc) from exc + def _warn_about_tls_in_tls( + self, + underlying_transport: asyncio.Transport, + req: "ClientRequest", + ) -> None: + """Issue a warning if the requested URL has HTTPS scheme.""" + if req.request_info.url.scheme != "https": + return + + asyncio_supports_tls_in_tls = getattr( + underlying_transport, + "_start_tls_compatible", + False, + ) + + if asyncio_supports_tls_in_tls: + return + + warnings.warn( + "An HTTPS request is being sent through an HTTPS proxy. " + "This support for TLS in TLS is known to be disabled " + "in the stdlib asyncio. This is why you'll probably see " + "an error in the log below.\n\n" + "It is possible to enable it via monkeypatching under " + "Python 3.7 or higher. For more details, see:\n" + "* https://bugs.python.org/issue37179\n" + "* https://github.com/python/cpython/pull/28073\n\n" + "You can temporarily patch this as follows:\n" + "* https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support\n" + "* https://github.com/aio-libs/aiohttp/discussions/6044\n", + RuntimeWarning, + source=self, + # Why `4`? At least 3 of the calls in the stack originate + # from the methods in this class. + stacklevel=3, + ) + + async def _start_tls_connection( + self, + underlying_transport: asyncio.Transport, + req: "ClientRequest", + timeout: "ClientTimeout", + client_error: Type[Exception] = ClientConnectorError, + ) -> Tuple[asyncio.BaseTransport, ResponseHandler]: + """Wrap the raw TCP transport with TLS.""" + tls_proto = self._factory() # Create a brand new proto for TLS + + # Safety of the `cast()` call here is based on the fact that + # internally `_get_ssl_context()` only returns `None` when + # `req.is_ssl()` evaluates to `False` which is never gonna happen + # in this code path. Of course, it's rather fragile + # maintainability-wise but this is to be solved separately. + sslcontext = cast(ssl.SSLContext, self._get_ssl_context(req)) + + try: + async with ceil_timeout(timeout.sock_connect): + try: + tls_transport = await self._loop.start_tls( + underlying_transport, + tls_proto, + sslcontext, + server_hostname=req.host, + ssl_handshake_timeout=timeout.total, + ) + except BaseException: + # We need to close the underlying transport since + # `start_tls()` probably failed before it had a + # chance to do this: + underlying_transport.close() + raise + except cert_errors as exc: + raise ClientConnectorCertificateError(req.connection_key, exc) from exc + except ssl_errors as exc: + raise ClientConnectorSSLError(req.connection_key, exc) from exc + except OSError as exc: + raise client_error(req.connection_key, exc) from exc + except TypeError as type_err: + # Example cause looks like this: + # TypeError: transport is not supported by start_tls() + + raise ClientConnectionError( + "Cannot initialize a TLS-in-TLS connection to host " + f"{req.host!s}:{req.port:d} through an underlying connection " + f"to an HTTPS proxy {req.proxy!s} ssl:{req.ssl or 'default'} " + f"[{type_err!s}]" + ) from type_err + else: + tls_proto.connection_made( + tls_transport + ) # Kick the state machine of the new TLS protocol + + return tls_transport, tls_proto + async def _create_direct_connection( self, req: "ClientRequest", @@ -1028,7 +1122,7 @@ def drop_exception(fut: "asyncio.Future[List[Dict[str, Any]]]") -> None: async def _create_proxy_connection( self, req: "ClientRequest", traces: List["Trace"], timeout: "ClientTimeout" - ) -> Tuple[asyncio.Transport, ResponseHandler]: + ) -> Tuple[asyncio.BaseTransport, ResponseHandler]: headers = {} # type: Dict[str, str] if req.proxy_headers is not None: headers = req.proxy_headers # type: ignore[assignment] @@ -1063,7 +1157,8 @@ async def _create_proxy_connection( proxy_req.headers[hdrs.PROXY_AUTHORIZATION] = auth if req.is_ssl(): - sslcontext = self._get_ssl_context(req) + self._warn_about_tls_in_tls(transport, req) + # For HTTPS requests over HTTP proxy # we must notify proxy to tunnel connection # so we send CONNECT command: @@ -1083,7 +1178,11 @@ async def _create_proxy_connection( try: protocol = conn._protocol assert protocol is not None - protocol.set_response_params() + + # read_until_eof=True will ensure the connection isn't closed + # once the response is received and processed allowing + # START_TLS to work on the connection below. + protocol.set_response_params(read_until_eof=True) resp = await proxy_resp.start(conn) except BaseException: proxy_resp.close() @@ -1104,21 +1203,19 @@ async def _create_proxy_connection( message=message, headers=resp.headers, ) - rawsock = transport.get_extra_info("socket", default=None) - if rawsock is None: - raise RuntimeError("Transport does not expose socket instance") - # Duplicate the socket, so now we can close proxy transport - rawsock = rawsock.dup() - finally: + except BaseException: + # It shouldn't be closed in `finally` because it's fed to + # `loop.start_tls()` and the docs say not to touch it after + # passing there. transport.close() + raise - transport, proto = await self._wrap_create_connection( - self._factory, - timeout=timeout, - ssl=sslcontext, - sock=rawsock, - server_hostname=req.host, + return await self._start_tls_connection( + # Access the old transport for the last time before it's + # closed and forgotten forever: + transport, req=req, + timeout=timeout, ) finally: proxy_resp.close() diff --git a/docs/client_advanced.rst b/docs/client_advanced.rst index 7a2f4bef21..316f41cd1f 100644 --- a/docs/client_advanced.rst +++ b/docs/client_advanced.rst @@ -533,9 +533,11 @@ DER with e.g:: Proxy support ------------- -aiohttp supports plain HTTP proxies and HTTP proxies that can be upgraded to HTTPS -via the HTTP CONNECT method. aiohttp does not support proxies that must be -connected to via ``https://``. To connect, use the *proxy* parameter:: +aiohttp supports plain HTTP proxies and HTTP proxies that can be +upgraded to HTTPS via the HTTP CONNECT method. aiohttp has a limited +support for proxies that must be connected to via ``https://`` — see +the info box below for more details. +To connect, use the *proxy* parameter:: async with aiohttp.ClientSession() as session: async with session.get("http://python.org", @@ -570,6 +572,33 @@ variables* (all are case insensitive):: Proxy credentials are given from ``~/.netrc`` file if present (see :class:`aiohttp.ClientSession` for more details). +.. attention:: + + CPython has introduced the support for TLS in TLS around Python 3.7. + But, as of now (Python 3.10), it's disabled for the transports that + :py:mod:`asyncio` uses. If the further release of Python (say v3.11) + toggles one attribute, it'll *just work™*. + + aiohttp v3.8 and higher is ready for this to happen and has code in + place supports TLS-in-TLS, hence sending HTTPS requests over HTTPS + proxy tunnels. + + ⚠️ For as long as your Python runtime doesn't declare the support for + TLS-in-TLS, please don't file bugs with aiohttp but rather try to + help the CPython upstream enable this feature. Meanwhile, if you + *really* need this to work, there's a patch that may help you make + it happen, include it into your app's code base: + https://github.com/aio-libs/aiohttp/discussions/6044#discussioncomment-1432443. + +.. important:: + + When supplying a custom :py:class:`ssl.SSLContext` instance, bear in + mind that it will be used not only to establish a TLS session with + the HTTPS endpoint you're hitting but also to establish a TLS tunnel + to the HTTPS proxy. To avoid surprises, make sure to set up the trust + chain that would recognize TLS certificates used by both the endpoint + and the proxy. + .. _aiohttp-persistent-session: Persistent session diff --git a/tests/test_client_request.py b/tests/test_client_request.py index cfe2a45edc..6ab8761778 100644 --- a/tests/test_client_request.py +++ b/tests/test_client_request.py @@ -119,11 +119,6 @@ def test_version_err(make_request: Any) -> None: make_request("get", "http://python.org/", version="1.c") -def test_https_proxy(make_request: Any) -> None: - with pytest.raises(ValueError): - make_request("get", "http://python.org/", proxy=URL("https://proxy.org")) - - def test_keep_alive(make_request: Any) -> None: req = make_request("get", "http://python.org/", version=(0, 9)) assert not req.keep_alive() diff --git a/tests/test_proxy.py b/tests/test_proxy.py index c778f85f53..af869ee88f 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -228,6 +228,7 @@ async def make_conn(): tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) + self.loop.start_tls = make_mocked_coro(mock.Mock()) req = ClientRequest( "GET", @@ -242,8 +243,6 @@ async def make_conn(): self.assertEqual(req.url.path, "/") self.assertEqual(proxy_req.method, "CONNECT") self.assertEqual(proxy_req.url, URL("https://www.python.org")) - tr.close.assert_called_once_with() - tr.get_extra_info.assert_called_with("socket", default=None) self.loop.run_until_complete(proxy_req.close()) proxy_resp.close() @@ -287,22 +286,10 @@ async def make_conn(): ] ) - seq = 0 - - async def create_connection(*args, **kwargs): - nonlocal seq - seq += 1 - - # connection to http://proxy.example.com - if seq == 1: - return mock.Mock(), mock.Mock() - # connection to https://www.python.org - elif seq == 2: - raise ssl.CertificateError - else: - assert False - - self.loop.create_connection = create_connection + # Called on connection to http://proxy.example.com + self.loop.create_connection = make_mocked_coro((mock.Mock(), mock.Mock())) + # Called on connection to https://www.python.org + self.loop.start_tls = make_mocked_coro(raise_exception=ssl.CertificateError) req = ClientRequest( "GET", @@ -353,75 +340,12 @@ async def make_conn(): ] ) - seq = 0 - - async def create_connection(*args, **kwargs): - nonlocal seq - seq += 1 - - # connection to http://proxy.example.com - if seq == 1: - return mock.Mock(), mock.Mock() - # connection to https://www.python.org - elif seq == 2: - raise ssl.SSLError - else: - assert False - - self.loop.create_connection = create_connection - - req = ClientRequest( - "GET", - URL("https://www.python.org"), - proxy=URL("http://proxy.example.com"), - loop=self.loop, + # Called on connection to http://proxy.example.com + self.loop.create_connection = make_mocked_coro( + (mock.Mock(), mock.Mock()), ) - with self.assertRaises(aiohttp.ClientConnectorSSLError): - self.loop.run_until_complete( - connector._create_connection(req, None, aiohttp.ClientTimeout()) - ) - - @mock.patch("aiohttp.connector.ClientRequest") - def test_https_connect_runtime_error(self, ClientRequestMock: Any) -> None: - proxy_req = ClientRequest( - "GET", URL("http://proxy.example.com"), loop=self.loop - ) - ClientRequestMock.return_value = proxy_req - - proxy_resp = ClientResponse( - "get", - URL("http://proxy.example.com"), - request_info=mock.Mock(), - writer=mock.Mock(), - continue100=None, - timer=TimerNoop(), - traces=[], - loop=self.loop, - session=mock.Mock(), - ) - proxy_req.send = make_mocked_coro(proxy_resp) - proxy_resp.start = make_mocked_coro(mock.Mock(status=200)) - - async def make_conn(): - return aiohttp.TCPConnector() - - connector = self.loop.run_until_complete(make_conn()) - connector._resolve_host = make_mocked_coro( - [ - { - "hostname": "hostname", - "host": "127.0.0.1", - "port": 80, - "family": socket.AF_INET, - "proto": 0, - "flags": 0, - } - ] - ) - - tr, proto = mock.Mock(), mock.Mock() - tr.get_extra_info.return_value = None - self.loop.create_connection = make_mocked_coro((tr, proto)) + # Called on connection to https://www.python.org + self.loop.start_tls = make_mocked_coro(raise_exception=ssl.SSLError) req = ClientRequest( "GET", @@ -429,17 +353,11 @@ async def make_conn(): proxy=URL("http://proxy.example.com"), loop=self.loop, ) - with self.assertRaisesRegex( - RuntimeError, "Transport does not expose socket instance" - ): + with self.assertRaises(aiohttp.ClientConnectorSSLError): self.loop.run_until_complete( connector._create_connection(req, None, aiohttp.ClientTimeout()) ) - self.loop.run_until_complete(proxy_req.close()) - proxy_resp.close() - self.loop.run_until_complete(req.close()) - @mock.patch("aiohttp.connector.ClientRequest") def test_https_connect_http_proxy_error(self, ClientRequestMock: Any) -> None: proxy_req = ClientRequest( @@ -650,6 +568,7 @@ async def make_conn(): tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) + self.loop.start_tls = make_mocked_coro(mock.Mock()) req = ClientRequest( "GET", @@ -661,18 +580,17 @@ async def make_conn(): connector._create_connection(req, None, aiohttp.ClientTimeout()) ) - self.loop.create_connection.assert_called_with( + self.loop.start_tls.assert_called_with( + mock.ANY, mock.ANY, - ssl=connector._make_ssl_context(True), - sock=mock.ANY, + connector._make_ssl_context(True), server_hostname="www.python.org", + ssl_handshake_timeout=mock.ANY, ) self.assertEqual(req.url.path, "/") self.assertEqual(proxy_req.method, "CONNECT") self.assertEqual(proxy_req.url, URL("https://www.python.org")) - tr.close.assert_called_once_with() - tr.get_extra_info.assert_called_with("socket", default=None) self.loop.run_until_complete(proxy_req.close()) proxy_resp.close() @@ -721,6 +639,7 @@ async def make_conn(): tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) + self.loop.start_tls = make_mocked_coro(mock.Mock()) self.assertIn("AUTHORIZATION", proxy_req.headers) self.assertNotIn("PROXY-AUTHORIZATION", proxy_req.headers) diff --git a/tests/test_proxy_functional.py b/tests/test_proxy_functional.py index a5091b0a82..eb44fd0463 100644 --- a/tests/test_proxy_functional.py +++ b/tests/test_proxy_functional.py @@ -2,8 +2,10 @@ import asyncio import os import pathlib +from re import match as match_regex from typing import Any from unittest import mock +from uuid import uuid4 import proxy import pytest @@ -11,6 +13,7 @@ import aiohttp from aiohttp import web +from aiohttp.client_exceptions import ClientConnectionError ASYNCIO_SUPPORTS_TLS_IN_TLS = hasattr( asyncio.sslproto._SSLProtocolTransport, @@ -55,7 +58,7 @@ def listen(self): @pytest.fixture def web_server_endpoint_payload(): - return "Test message" + return str(uuid4()) @pytest.fixture(params=("http", "https")) @@ -110,10 +113,6 @@ def _pretend_asyncio_supports_tls_in_tls( ) -@pytest.mark.xfail( - reason="https://github.com/aio-libs/aiohttp/pull/5992", - raises=ValueError, -) @pytest.mark.parametrize("web_server_endpoint_type", ("http", "https")) @pytest.mark.usefixtures("_pretend_asyncio_supports_tls_in_tls", "loop") async def test_secure_https_proxy_absolute_path( @@ -122,7 +121,7 @@ async def test_secure_https_proxy_absolute_path( web_server_endpoint_url, web_server_endpoint_payload, ) -> None: - """Test urls can be requested through a secure proxy.""" + """Ensure HTTP(S) sites are accessible through a secure proxy.""" conn = aiohttp.TCPConnector() sess = aiohttp.ClientSession(connector=conn) @@ -140,6 +139,69 @@ async def test_secure_https_proxy_absolute_path( await conn.close() +@pytest.mark.parametrize("web_server_endpoint_type", ("https",)) +@pytest.mark.usefixtures("loop") +async def test_https_proxy_unsupported_tls_in_tls( + client_ssl_ctx, + secure_proxy_url, + web_server_endpoint_type, +) -> None: + """Ensure connecting to TLS endpoints w/ HTTPS proxy needs patching. + + This also checks that a helpful warning on how to patch the env + is displayed. + """ + url = URL.build(scheme=web_server_endpoint_type, host="python.org") + + escaped_host_port = ":".join((url.host.replace(".", r"\."), str(url.port))) + escaped_proxy_url = str(secure_proxy_url).replace(".", r"\.") + + conn = aiohttp.TCPConnector() + sess = aiohttp.ClientSession(connector=conn) + + expected_warning_text = ( + r"^" + r"An HTTPS request is being sent through an HTTPS proxy\. " + "This support for TLS in TLS is known to be disabled " + r"in the stdlib asyncio\. This is why you'll probably see " + r"an error in the log below.\n\n" + "It is possible to enable it via monkeypatching under " + r"Python 3\.7 or higher\. For more details, see:\n" + r"* https://bugs\.python\.org/issue37179\n" + r"* https://github\.com/python/cpython/pull/28073\n\n" + r"You can temporarily patch this as follows:\n" + r"* https://docs\.aiohttp\.org/en/stable/client_advanced\.html#proxy-support\n", + r"* https://github\.com/aio-libs/aiohttp/discussions/6044\n$", + ) + type_err = ( + r"transport is not supported by start_tls\(\)" + ) + expected_exception_reason = ( + r"^" + "Cannot initialize a TLS-in-TLS connection to host " + f"{escaped_host_port!s} through an underlying connection " + f"to an HTTPS proxy {escaped_proxy_url!s} ssl:{client_ssl_ctx!s} " + f"[{type_err!s}]" + r"$" + ) + + with pytest.raises( + ClientConnectionError, + match=expected_exception_reason, + ) as conn_err, pytest.warns( + RuntimeWarning, + match=expected_warning_text, + ): + await sess.get(url, proxy=secure_proxy_url, ssl=client_ssl_ctx) + + assert type(conn_err.value.__cause__) == TypeError + assert match_regex(f"^{type_err!s}$", str(conn_err.value.__cause__)) + + await sess.close() + await conn.close() + + @pytest.fixture def proxy_test_server(aiohttp_raw_server: Any, loop: Any, monkeypatch: Any): # Handle all proxy requests and imitate remote server response.