From 70256afb4ae54819d0a463defbd8dd5a27e87cb8 Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Sun, 13 Sep 2020 12:37:44 +0200 Subject: [PATCH 1/7] Tweak sync dropped connection detection --- httpcore/_backends/sync.py | 6 +++--- httpcore/_utils.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/httpcore/_backends/sync.py b/httpcore/_backends/sync.py index 1261753db..755c5969b 100644 --- a/httpcore/_backends/sync.py +++ b/httpcore/_backends/sync.py @@ -1,4 +1,3 @@ -import select import socket import threading import time @@ -17,6 +16,7 @@ map_exceptions, ) from .._types import TimeoutDict +from .._utils import wait_for_read class SyncSocketStream: @@ -78,8 +78,8 @@ def close(self) -> None: self.sock.close() def is_connection_dropped(self) -> bool: - rready, _wready, _xready = select.select([self.sock], [], [], 0) - return bool(rready) + rready = wait_for_read([self.sock], timeout=0) + return len(rready) > 0 class SyncLock: diff --git a/httpcore/_utils.py b/httpcore/_utils.py index ad5c77501..125e16235 100644 --- a/httpcore/_utils.py +++ b/httpcore/_utils.py @@ -1,5 +1,6 @@ import logging import os +import selectors import sys import typing @@ -63,3 +64,16 @@ def origin_to_url_string(origin: Origin) -> str: scheme, host, explicit_port = origin port = f":{explicit_port}" if explicit_port != DEFAULT_PORTS[scheme] else "" return f"{scheme.decode('ascii')}://{host.decode('ascii')}{port}" + + +def _wait_io_events(socks: list, events: int, timeout: float = None) -> list: + # Better cross-platform approach than low-level `select.select()` calls. + # See: https://github.com/encode/httpcore/issues/182 + sel = selectors.DefaultSelector() + for sock in socks: + sel.register(sock, events) + return [key.fileobj for key, mask in sel.select(timeout) if mask & events] + + +def wait_for_read(socks: list, timeout: float = None) -> list: + return _wait_io_events(socks, events=selectors.EVENT_READ, timeout=timeout) From 2c6b3c9fcb61b4bf432acc0e5d44de35ebfbe8f6 Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Sun, 13 Sep 2020 12:54:27 +0200 Subject: [PATCH 2/7] Nits --- httpcore/_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/httpcore/_utils.py b/httpcore/_utils.py index 125e16235..5848debd0 100644 --- a/httpcore/_utils.py +++ b/httpcore/_utils.py @@ -66,8 +66,9 @@ def origin_to_url_string(origin: Origin) -> str: return f"{scheme.decode('ascii')}://{host.decode('ascii')}{port}" -def _wait_io_events(socks: list, events: int, timeout: float = None) -> list: - # Better cross-platform approach than low-level `select.select()` calls. +def _wait_for_io_events(socks: list, events: int, timeout: float = None) -> list: + # Prefer the `selectors` module rather than the lower-level `select` module to + # improve cross-platform support. # See: https://github.com/encode/httpcore/issues/182 sel = selectors.DefaultSelector() for sock in socks: @@ -76,4 +77,4 @@ def _wait_io_events(socks: list, events: int, timeout: float = None) -> list: def wait_for_read(socks: list, timeout: float = None) -> list: - return _wait_io_events(socks, events=selectors.EVENT_READ, timeout=timeout) + return _wait_for_io_events(socks, events=selectors.EVENT_READ, timeout=timeout) From 5fc8e203364412dbcd4a01b73d2da1c6e80608d1 Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Tue, 22 Sep 2020 20:22:21 +0200 Subject: [PATCH 3/7] Use common helper across sync, asyncio, curio, anyio --- httpcore/_backends/anyio.py | 5 ++--- httpcore/_backends/asyncio.py | 19 +++++-------------- httpcore/_backends/curio.py | 7 ++----- httpcore/_backends/sync.py | 5 ++--- httpcore/_utils.py | 23 +++++++++++++---------- 5 files changed, 24 insertions(+), 35 deletions(-) diff --git a/httpcore/_backends/anyio.py b/httpcore/_backends/anyio.py index 0921be0f9..67f214133 100644 --- a/httpcore/_backends/anyio.py +++ b/httpcore/_backends/anyio.py @@ -1,4 +1,3 @@ -import select from ssl import SSLContext from typing import Optional @@ -17,6 +16,7 @@ WriteTimeout, ) from .._types import TimeoutDict +from .._utils import is_socket_at_eof from .base import AsyncBackend, AsyncLock, AsyncSemaphore, AsyncSocketStream @@ -87,8 +87,7 @@ async def aclose(self) -> None: def is_connection_dropped(self) -> bool: raw_socket = self.stream.extra(SocketAttribute.raw_socket) - rready, _wready, _xready = select.select([raw_socket], [], [], 0) - return bool(rready) + return is_socket_at_eof(raw_socket.fileno()) class Lock(AsyncLock): diff --git a/httpcore/_backends/asyncio.py b/httpcore/_backends/asyncio.py index 431b0867f..59d42453a 100644 --- a/httpcore/_backends/asyncio.py +++ b/httpcore/_backends/asyncio.py @@ -1,4 +1,5 @@ import asyncio +import socket from ssl import SSLContext from typing import Optional @@ -13,6 +14,7 @@ map_exceptions, ) from .._types import TimeoutDict +from .._utils import is_socket_at_eof from .base import AsyncBackend, AsyncLock, AsyncSemaphore, AsyncSocketStream SSL_MONKEY_PATCH_APPLIED = False @@ -160,20 +162,9 @@ async def aclose(self) -> None: self.stream_writer.close() def is_connection_dropped(self) -> bool: - # Counter-intuitively, what we really want to know here is whether the socket is - # *readable*, i.e. whether it would return immediately with empty bytes if we - # called `.recv()` on it, indicating that the other end has closed the socket. - # See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 - # - # As it turns out, asyncio checks for readability in the background - # (see: https://github.com/encode/httpx/pull/276#discussion_r322000402), - # so checking for EOF or readability here would yield the same result. - # - # At the cost of rigour, we check for EOF instead of readability because asyncio - # does not expose any public API to check for readability. - # (For a solution that uses private asyncio APIs, see: - # https://github.com/encode/httpx/pull/143#issuecomment-515202982) - return self.stream_reader.at_eof() + transport = self.stream_reader._transport # type: ignore + sock: socket.socket = transport.get_extra_info("socket") + return is_socket_at_eof(sock.fileno()) class Lock(AsyncLock): diff --git a/httpcore/_backends/curio.py b/httpcore/_backends/curio.py index 9f69850b9..d84aa4510 100644 --- a/httpcore/_backends/curio.py +++ b/httpcore/_backends/curio.py @@ -1,4 +1,3 @@ -import select from ssl import SSLContext, SSLSocket from typing import Optional @@ -15,7 +14,7 @@ map_exceptions, ) from .._types import TimeoutDict -from .._utils import get_logger +from .._utils import get_logger, is_socket_at_eof from .base import AsyncBackend, AsyncLock, AsyncSemaphore, AsyncSocketStream logger = get_logger(__name__) @@ -133,9 +132,7 @@ async def aclose(self) -> None: await self.socket.close() def is_connection_dropped(self) -> bool: - rready, _, _ = select.select([self.socket.fileno()], [], [], 0) - - return bool(rready) + return is_socket_at_eof(self.socket.fileno()) class CurioBackend(AsyncBackend): diff --git a/httpcore/_backends/sync.py b/httpcore/_backends/sync.py index 755c5969b..5ea9b4d21 100644 --- a/httpcore/_backends/sync.py +++ b/httpcore/_backends/sync.py @@ -16,7 +16,7 @@ map_exceptions, ) from .._types import TimeoutDict -from .._utils import wait_for_read +from .._utils import is_socket_at_eof class SyncSocketStream: @@ -78,8 +78,7 @@ def close(self) -> None: self.sock.close() def is_connection_dropped(self) -> bool: - rready = wait_for_read([self.sock], timeout=0) - return len(rready) > 0 + return is_socket_at_eof(self.sock.fileno()) class SyncLock: diff --git a/httpcore/_utils.py b/httpcore/_utils.py index 5848debd0..cf26f4e0b 100644 --- a/httpcore/_utils.py +++ b/httpcore/_utils.py @@ -66,15 +66,18 @@ def origin_to_url_string(origin: Origin) -> str: return f"{scheme.decode('ascii')}://{host.decode('ascii')}{port}" -def _wait_for_io_events(socks: list, events: int, timeout: float = None) -> list: - # Prefer the `selectors` module rather than the lower-level `select` module to - # improve cross-platform support. +def is_socket_at_eof(sock_fd: int) -> bool: + # 'Has the socket reached EOF?' is equivalent to 'Is the socket readable?' + # This is because if the other end has dropped the connection, + # a recv() call on the socket would return immediately (with empty bytes), and + # vice versa. + # See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 + # Typically we'd use the `select` module here, but we use the higher-level + # `selectors` module to improve cross-platform support. # See: https://github.com/encode/httpcore/issues/182 sel = selectors.DefaultSelector() - for sock in socks: - sel.register(sock, events) - return [key.fileobj for key, mask in sel.select(timeout) if mask & events] - - -def wait_for_read(socks: list, timeout: float = None) -> list: - return _wait_for_io_events(socks, events=selectors.EVENT_READ, timeout=timeout) + sel.register(sock_fd, selectors.EVENT_READ) + read_ready = [ + key.fileobj for key, mask in sel.select(0) if mask & selectors.EVENT_READ + ] + return len(read_ready) > 0 From 279c8e7381a6a95341e2253cc14f0ecd370ba1ff Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Thu, 8 Oct 2020 20:53:45 +0200 Subject: [PATCH 4/7] Update --- httpcore/_backends/anyio.py | 6 +++--- httpcore/_backends/asyncio.py | 4 ++-- httpcore/_backends/curio.py | 4 ++-- httpcore/_backends/sync.py | 4 ++-- httpcore/_backends/trio.py | 5 ++--- httpcore/_utils.py | 31 +++++++++++++++++++------------ 6 files changed, 30 insertions(+), 24 deletions(-) diff --git a/httpcore/_backends/anyio.py b/httpcore/_backends/anyio.py index 67f214133..6ed1c3c29 100644 --- a/httpcore/_backends/anyio.py +++ b/httpcore/_backends/anyio.py @@ -16,7 +16,7 @@ WriteTimeout, ) from .._types import TimeoutDict -from .._utils import is_socket_at_eof +from .._utils import is_socket_readable from .base import AsyncBackend, AsyncLock, AsyncSemaphore, AsyncSocketStream @@ -86,8 +86,8 @@ async def aclose(self) -> None: raise CloseError from exc def is_connection_dropped(self) -> bool: - raw_socket = self.stream.extra(SocketAttribute.raw_socket) - return is_socket_at_eof(raw_socket.fileno()) + sock = self.stream.extra(SocketAttribute.raw_socket) + return is_socket_readable(sock.fileno()) class Lock(AsyncLock): diff --git a/httpcore/_backends/asyncio.py b/httpcore/_backends/asyncio.py index df6d4fc72..954ffdbab 100644 --- a/httpcore/_backends/asyncio.py +++ b/httpcore/_backends/asyncio.py @@ -14,7 +14,7 @@ map_exceptions, ) from .._types import TimeoutDict -from .._utils import is_socket_at_eof +from .._utils import is_socket_readable from .base import AsyncBackend, AsyncLock, AsyncSemaphore, AsyncSocketStream SSL_MONKEY_PATCH_APPLIED = False @@ -176,7 +176,7 @@ async def aclose(self) -> None: def is_connection_dropped(self) -> bool: transport = self.stream_reader._transport # type: ignore sock: socket.socket = transport.get_extra_info("socket") - return is_socket_at_eof(sock.fileno()) + return is_socket_readable(sock.fileno()) class Lock(AsyncLock): diff --git a/httpcore/_backends/curio.py b/httpcore/_backends/curio.py index d84aa4510..92f066f48 100644 --- a/httpcore/_backends/curio.py +++ b/httpcore/_backends/curio.py @@ -14,7 +14,7 @@ map_exceptions, ) from .._types import TimeoutDict -from .._utils import get_logger, is_socket_at_eof +from .._utils import get_logger, is_socket_readable from .base import AsyncBackend, AsyncLock, AsyncSemaphore, AsyncSocketStream logger = get_logger(__name__) @@ -132,7 +132,7 @@ async def aclose(self) -> None: await self.socket.close() def is_connection_dropped(self) -> bool: - return is_socket_at_eof(self.socket.fileno()) + return is_socket_readable(self.socket.fileno()) class CurioBackend(AsyncBackend): diff --git a/httpcore/_backends/sync.py b/httpcore/_backends/sync.py index 5ea9b4d21..23c8bd726 100644 --- a/httpcore/_backends/sync.py +++ b/httpcore/_backends/sync.py @@ -16,7 +16,7 @@ map_exceptions, ) from .._types import TimeoutDict -from .._utils import is_socket_at_eof +from .._utils import is_socket_readable class SyncSocketStream: @@ -78,7 +78,7 @@ def close(self) -> None: self.sock.close() def is_connection_dropped(self) -> bool: - return is_socket_at_eof(self.sock.fileno()) + return is_socket_readable(self.sock.fileno()) class SyncLock: diff --git a/httpcore/_backends/trio.py b/httpcore/_backends/trio.py index e762f4620..e3ff4103f 100644 --- a/httpcore/_backends/trio.py +++ b/httpcore/_backends/trio.py @@ -91,9 +91,8 @@ def is_connection_dropped(self) -> bool: stream = stream.transport_stream assert isinstance(stream, trio.SocketStream) - # Counter-intuitively, what we really want to know here is whether the socket is - # *readable*, i.e. whether it would return immediately with empty bytes if we - # called `.recv()` on it, indicating that the other end has closed the socket. + # The other end has closed the connection if and only if the socket is readable, + # i.e. if it would return immediately with b"" if we called .recv() on it. # See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 return stream.socket.is_readable() diff --git a/httpcore/_utils.py b/httpcore/_utils.py index cf26f4e0b..58c6539e3 100644 --- a/httpcore/_utils.py +++ b/httpcore/_utils.py @@ -66,18 +66,25 @@ def origin_to_url_string(origin: Origin) -> str: return f"{scheme.decode('ascii')}://{host.decode('ascii')}{port}" -def is_socket_at_eof(sock_fd: int) -> bool: - # 'Has the socket reached EOF?' is equivalent to 'Is the socket readable?' - # This is because if the other end has dropped the connection, - # a recv() call on the socket would return immediately (with empty bytes), and - # vice versa. - # See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 - # Typically we'd use the `select` module here, but we use the higher-level - # `selectors` module to improve cross-platform support. +def is_socket_readable(sock_fd: int) -> bool: + """ + Return whether a socket, as identifed by its file descriptor, is readable. + + "A socket is readable" means that it would return immediately with b"" if we + called .recv() on it. + + This is also equivalent to "the connection has been closed on the other end". + + See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 + """ + # NOTE: We prefer the `selectors` module to `select`, because of known limitations + # of `select` on Linux when dealing with many open file descriptors. # See: https://github.com/encode/httpcore/issues/182 + # On Windows `select` is just fine, but that's also what `DefaultSelector` uses + # there, so `selectors` is really the generally-appropriate solution. + # See: https://github.com/encode/httpcore/pull/193#issuecomment-703129316 sel = selectors.DefaultSelector() - sel.register(sock_fd, selectors.EVENT_READ) - read_ready = [ - key.fileobj for key, mask in sel.select(0) if mask & selectors.EVENT_READ - ] + event = selectors.EVENT_READ + sel.register(sock_fd, event) + read_ready = [key.fileobj for key, mask in sel.select(0) if mask & event] return len(read_ready) > 0 From f2dd7ded700fec0b26976b1249537065baef0790 Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Thu, 8 Oct 2020 21:36:21 +0200 Subject: [PATCH 5/7] Add test case --- tests/async_tests/test_interfaces.py | 29 ++++++++++++++++++++++++++++ tests/conftest.py | 26 +++++++++++++++++++++++++ tests/sync_tests/test_interfaces.py | 29 ++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/tests/async_tests/test_interfaces.py b/tests/async_tests/test_interfaces.py index f4ba9e40c..340ba4fd2 100644 --- a/tests/async_tests/test_interfaces.py +++ b/tests/async_tests/test_interfaces.py @@ -361,3 +361,32 @@ async def test_explicit_backend_name(server: Server) -> None: assert status_code == 200 assert ext == {"http_version": "HTTP/1.1", "reason": "OK"} assert len(http._connections[url[:3]]) == 1 # type: ignore + + +@pytest.mark.anyio +@pytest.mark.usefixtures("too_many_open_files_minus_one") +@pytest.mark.skipif(platform.system() != "Linux", reason="Only a problem on Linux") +async def test_broken_socket_detection_many_open_files( + backend: str, server: Server +) -> None: + """ + Regression test for: https://github.com/encode/httpcore/issues/182 + """ + async with httpcore.AsyncConnectionPool(backend=backend) as http: + method = b"GET" + url = (b"http", *server.netloc, b"/") + headers = [server.host_header] + + # * First attempt will be successful because it will grab the last + # available fd before what select() supports on the platform. + # * Second attempt would have failed without a fix, due to a "filedescriptor + # out of range in select()" exception. + for _ in range(2): + status_code, response_headers, stream, ext = await http.arequest( + method, url, headers + ) + await read_body(stream) + + assert status_code == 200 + assert ext == {"http_version": "HTTP/1.1", "reason": "OK"} + assert len(http._connections[url[:3]]) == 1 # type: ignore diff --git a/tests/conftest.py b/tests/conftest.py index cc50d96db..2e1370cb8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,3 +73,29 @@ def server() -> Server: @pytest.fixture(scope="session") def https_server() -> Server: return Server(SERVER_HOST, port=443) + + +@pytest.fixture(scope="function") +def too_many_open_files_minus_one() -> typing.Iterator[None]: + # Fixture for test regression on https://github.com/encode/httpcore/issues/182 + # Max number of descriptors chosen according to: + # See: https://man7.org/linux/man-pages/man2/select.2.html#top_of_page + # "To monitor file descriptors greater than 1023, use poll or epoll instead." + max_num_descriptors = 1023 + + files = [] + + while True: + f = open("/dev/null") + # Leave one file descriptor available for a transport to perform + # a successful request. + if f.fileno() > max_num_descriptors - 1: + f.close() + break + files.append(f) + + try: + yield + finally: + for f in files: + f.close() diff --git a/tests/sync_tests/test_interfaces.py b/tests/sync_tests/test_interfaces.py index 1e1282906..8e6666510 100644 --- a/tests/sync_tests/test_interfaces.py +++ b/tests/sync_tests/test_interfaces.py @@ -361,3 +361,32 @@ def test_explicit_backend_name(server: Server) -> None: assert status_code == 200 assert ext == {"http_version": "HTTP/1.1", "reason": "OK"} assert len(http._connections[url[:3]]) == 1 # type: ignore + + + +@pytest.mark.usefixtures("too_many_open_files_minus_one") +@pytest.mark.skipif(platform.system() != "Linux", reason="Only a problem on Linux") +def test_broken_socket_detection_many_open_files( + backend: str, server: Server +) -> None: + """ + Regression test for: https://github.com/encode/httpcore/issues/182 + """ + with httpcore.SyncConnectionPool(backend=backend) as http: + method = b"GET" + url = (b"http", *server.netloc, b"/") + headers = [server.host_header] + + # * First attempt will be successful because it will grab the last + # available fd before what select() supports on the platform. + # * Second attempt would have failed without a fix, due to a "filedescriptor + # out of range in select()" exception. + for _ in range(2): + status_code, response_headers, stream, ext = http.request( + method, url, headers + ) + read_body(stream) + + assert status_code == 200 + assert ext == {"http_version": "HTTP/1.1", "reason": "OK"} + assert len(http._connections[url[:3]]) == 1 # type: ignore From adeb3a21c81fe74d3512b6b1e95c0a08947cdc12 Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Sat, 17 Oct 2020 13:39:19 +0200 Subject: [PATCH 6/7] Rename is_connection_dropped -> is_socket_readable, tweak comments --- httpcore/_async/connection.py | 4 ++-- httpcore/_async/connection_pool.py | 9 ++++++++- httpcore/_async/http.py | 4 ++-- httpcore/_async/http11.py | 4 ++-- httpcore/_async/http2.py | 4 ++-- httpcore/_backends/anyio.py | 2 +- httpcore/_backends/asyncio.py | 2 +- httpcore/_backends/base.py | 2 +- httpcore/_backends/curio.py | 2 +- httpcore/_backends/sync.py | 2 +- httpcore/_backends/trio.py | 5 +---- httpcore/_sync/connection.py | 4 ++-- httpcore/_sync/connection_pool.py | 9 ++++++++- httpcore/_sync/http.py | 4 ++-- httpcore/_sync/http11.py | 4 ++-- httpcore/_sync/http2.py | 4 ++-- httpcore/_utils.py | 20 ++++++++++---------- tests/async_tests/test_connection_pool.py | 2 +- tests/async_tests/test_interfaces.py | 2 +- tests/sync_tests/test_connection_pool.py | 2 +- tests/sync_tests/test_interfaces.py | 2 +- 21 files changed, 52 insertions(+), 41 deletions(-) diff --git a/httpcore/_async/connection.py b/httpcore/_async/connection.py index 4ed31e8ab..d3d7b41bf 100644 --- a/httpcore/_async/connection.py +++ b/httpcore/_async/connection.py @@ -165,8 +165,8 @@ def state(self) -> ConnectionState: return ConnectionState.PENDING return self.connection.get_state() - def is_connection_dropped(self) -> bool: - return self.connection is not None and self.connection.is_connection_dropped() + def is_socket_readable(self) -> bool: + return self.connection is not None and self.connection.is_socket_readable() def mark_as_ready(self) -> None: if self.connection is not None: diff --git a/httpcore/_async/connection_pool.py b/httpcore/_async/connection_pool.py index 0dcba50d1..46ede0ba4 100644 --- a/httpcore/_async/connection_pool.py +++ b/httpcore/_async/connection_pool.py @@ -245,7 +245,14 @@ async def _get_connection_from_pool( seen_http11 = True if connection.state == ConnectionState.IDLE: - if connection.is_connection_dropped(): + if connection.is_socket_readable(): + # If the socket is readable while the connection is idle (meaning + # we don't expect the server to send any data), then the only valid + # reason is that the other end has disconnected, which means we + # should drop the connection too. + # (For a detailed run-through of what a "readable" socket is, and + # why this is the best thing for us to do here, see: + # https://github.com/encode/httpx/pull/143#issuecomment-515181778) logger.trace("removing dropped idle connection=%r", connection) # IDLE connections that have been dropped should be # removed from the pool. diff --git a/httpcore/_async/http.py b/httpcore/_async/http.py index aa66ec4a5..3e024a60d 100644 --- a/httpcore/_async/http.py +++ b/httpcore/_async/http.py @@ -20,9 +20,9 @@ def mark_as_ready(self) -> None: """ raise NotImplementedError() # pragma: nocover - def is_connection_dropped(self) -> bool: + def is_socket_readable(self) -> bool: """ - Return 'True' if the connection has been dropped by the remote end. + Return 'True' if the underlying network socket is readable. """ raise NotImplementedError() # pragma: nocover diff --git a/httpcore/_async/http11.py b/httpcore/_async/http11.py index 686c76960..ffa3fa8b4 100644 --- a/httpcore/_async/http11.py +++ b/httpcore/_async/http11.py @@ -201,5 +201,5 @@ async def aclose(self) -> None: await self.socket.aclose() - def is_connection_dropped(self) -> bool: - return self.socket.is_connection_dropped() + def is_socket_readable(self) -> bool: + return self.socket.is_readable() diff --git a/httpcore/_async/http2.py b/httpcore/_async/http2.py index 6dd84f1d8..b911d833b 100644 --- a/httpcore/_async/http2.py +++ b/httpcore/_async/http2.py @@ -158,8 +158,8 @@ async def send_connection_init(self, timeout: TimeoutDict) -> None: def is_closed(self) -> bool: return False - def is_connection_dropped(self) -> bool: - return self.socket.is_connection_dropped() + def is_socket_readable(self) -> bool: + return self.socket.is_readable() async def aclose(self) -> None: logger.trace("close_connection=%r", self) diff --git a/httpcore/_backends/anyio.py b/httpcore/_backends/anyio.py index 8d5c9ace7..466747e48 100644 --- a/httpcore/_backends/anyio.py +++ b/httpcore/_backends/anyio.py @@ -85,7 +85,7 @@ async def aclose(self) -> None: except BrokenResourceError as exc: raise CloseError from exc - def is_connection_dropped(self) -> bool: + def is_readable(self) -> bool: sock = self.stream.extra(SocketAttribute.raw_socket) return is_socket_readable(sock.fileno()) diff --git a/httpcore/_backends/asyncio.py b/httpcore/_backends/asyncio.py index 935e852f5..32346af3c 100644 --- a/httpcore/_backends/asyncio.py +++ b/httpcore/_backends/asyncio.py @@ -173,7 +173,7 @@ async def aclose(self) -> None: with map_exceptions({OSError: CloseError}): self.stream_writer.close() - def is_connection_dropped(self) -> bool: + def is_readable(self) -> bool: transport = self.stream_reader._transport # type: ignore sock: socket.socket = transport.get_extra_info("socket") return is_socket_readable(sock.fileno()) diff --git a/httpcore/_backends/base.py b/httpcore/_backends/base.py index 6552ccc74..1ca6e31b5 100644 --- a/httpcore/_backends/base.py +++ b/httpcore/_backends/base.py @@ -63,7 +63,7 @@ async def write(self, data: bytes, timeout: TimeoutDict) -> None: async def aclose(self) -> None: raise NotImplementedError() # pragma: no cover - def is_connection_dropped(self) -> bool: + def is_readable(self) -> bool: raise NotImplementedError() # pragma: no cover diff --git a/httpcore/_backends/curio.py b/httpcore/_backends/curio.py index f881d80ec..2c1ae57ea 100644 --- a/httpcore/_backends/curio.py +++ b/httpcore/_backends/curio.py @@ -131,7 +131,7 @@ async def aclose(self) -> None: await self.stream.close() await self.socket.close() - def is_connection_dropped(self) -> bool: + def is_readable(self) -> bool: return is_socket_readable(self.socket.fileno()) diff --git a/httpcore/_backends/sync.py b/httpcore/_backends/sync.py index d45f223da..25e38ed0e 100644 --- a/httpcore/_backends/sync.py +++ b/httpcore/_backends/sync.py @@ -77,7 +77,7 @@ def close(self) -> None: with map_exceptions({socket.error: CloseError}): self.sock.close() - def is_connection_dropped(self) -> bool: + def is_readable(self) -> bool: return is_socket_readable(self.sock.fileno()) diff --git a/httpcore/_backends/trio.py b/httpcore/_backends/trio.py index 32d11977e..a961a23b8 100644 --- a/httpcore/_backends/trio.py +++ b/httpcore/_backends/trio.py @@ -82,7 +82,7 @@ async def aclose(self) -> None: with map_exceptions({trio.BrokenResourceError: CloseError}): await self.stream.aclose() - def is_connection_dropped(self) -> bool: + def is_readable(self) -> bool: # Adapted from: https://github.com/encode/httpx/pull/143#issuecomment-515202982 stream = self.stream @@ -91,9 +91,6 @@ def is_connection_dropped(self) -> bool: stream = stream.transport_stream assert isinstance(stream, trio.SocketStream) - # The other end has closed the connection if and only if the socket is readable, - # i.e. if it would return immediately with b"" if we called .recv() on it. - # See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 return stream.socket.is_readable() diff --git a/httpcore/_sync/connection.py b/httpcore/_sync/connection.py index faf5aa09a..f10f2aade 100644 --- a/httpcore/_sync/connection.py +++ b/httpcore/_sync/connection.py @@ -165,8 +165,8 @@ def state(self) -> ConnectionState: return ConnectionState.PENDING return self.connection.get_state() - def is_connection_dropped(self) -> bool: - return self.connection is not None and self.connection.is_connection_dropped() + def is_socket_readable(self) -> bool: + return self.connection is not None and self.connection.is_socket_readable() def mark_as_ready(self) -> None: if self.connection is not None: diff --git a/httpcore/_sync/connection_pool.py b/httpcore/_sync/connection_pool.py index b0954ad56..4702184b8 100644 --- a/httpcore/_sync/connection_pool.py +++ b/httpcore/_sync/connection_pool.py @@ -245,7 +245,14 @@ def _get_connection_from_pool( seen_http11 = True if connection.state == ConnectionState.IDLE: - if connection.is_connection_dropped(): + if connection.is_socket_readable(): + # If the socket is readable while the connection is idle (meaning + # we don't expect the server to send any data), then the only valid + # reason is that the other end has disconnected, which means we + # should drop the connection too. + # (For a detailed run-through of what a "readable" socket is, and + # why this is the best thing for us to do here, see: + # https://github.com/encode/httpx/pull/143#issuecomment-515181778) logger.trace("removing dropped idle connection=%r", connection) # IDLE connections that have been dropped should be # removed from the pool. diff --git a/httpcore/_sync/http.py b/httpcore/_sync/http.py index 24647fa31..8c3550205 100644 --- a/httpcore/_sync/http.py +++ b/httpcore/_sync/http.py @@ -20,9 +20,9 @@ def mark_as_ready(self) -> None: """ raise NotImplementedError() # pragma: nocover - def is_connection_dropped(self) -> bool: + def is_socket_readable(self) -> bool: """ - Return 'True' if the connection has been dropped by the remote end. + Return 'True' if the underlying network socket is readable. """ raise NotImplementedError() # pragma: nocover diff --git a/httpcore/_sync/http11.py b/httpcore/_sync/http11.py index 1d9f3547c..b827454a1 100644 --- a/httpcore/_sync/http11.py +++ b/httpcore/_sync/http11.py @@ -201,5 +201,5 @@ def close(self) -> None: self.socket.close() - def is_connection_dropped(self) -> bool: - return self.socket.is_connection_dropped() + def is_socket_readable(self) -> bool: + return self.socket.is_readable() diff --git a/httpcore/_sync/http2.py b/httpcore/_sync/http2.py index 2d8b8d121..050807f21 100644 --- a/httpcore/_sync/http2.py +++ b/httpcore/_sync/http2.py @@ -158,8 +158,8 @@ def send_connection_init(self, timeout: TimeoutDict) -> None: def is_closed(self) -> bool: return False - def is_connection_dropped(self) -> bool: - return self.socket.is_connection_dropped() + def is_socket_readable(self) -> bool: + return self.socket.is_readable() def close(self) -> None: logger.trace("close_connection=%r", self) diff --git a/httpcore/_utils.py b/httpcore/_utils.py index d411bf3f9..71f6e13a4 100644 --- a/httpcore/_utils.py +++ b/httpcore/_utils.py @@ -77,18 +77,18 @@ def is_socket_readable(sock_fd: int) -> bool: """ Return whether a socket, as identifed by its file descriptor, is readable. - "A socket is readable" means that it would return immediately with b"" if we - called .recv() on it. - - This is also equivalent to "the connection has been closed on the other end". - - See: https://github.com/encode/httpx/pull/143#issuecomment-515181778 + "A socket is readable" means that the read buffer isn't empty, i.e. that calling + .recv() on it would immediately return some data. """ - # NOTE: We prefer the `selectors` module to `select`, because of known limitations - # of `select` on Linux when dealing with many open file descriptors. + # NOTE: check for readability without actually attempting to read, because we don't + # want to block forever if it's not readable. Instead, we use a select-based + # approach. + # Note that we use `selectors` rather than `select`, because of known limitations + # of `select()` on Linux when dealing with many open file descriptors. # See: https://github.com/encode/httpcore/issues/182 - # On Windows `select` is just fine, but that's also what `DefaultSelector` uses - # there, so `selectors` is really the generally-appropriate solution. + # On Windows `select()` is just fine, and it also happens to be what the + # `selectors.DefaultSelector()` class uses. + # So, all in all, `selectors` should work just fine everywhere. # See: https://github.com/encode/httpcore/pull/193#issuecomment-703129316 sel = selectors.DefaultSelector() event = selectors.EVENT_READ diff --git a/tests/async_tests/test_connection_pool.py b/tests/async_tests/test_connection_pool.py index b52f0c5ad..5b30209d3 100644 --- a/tests/async_tests/test_connection_pool.py +++ b/tests/async_tests/test_connection_pool.py @@ -49,7 +49,7 @@ def info(self) -> str: def mark_as_ready(self) -> None: self.state = ConnectionState.READY - def is_connection_dropped(self) -> bool: + def is_socket_readable(self) -> bool: return False diff --git a/tests/async_tests/test_interfaces.py b/tests/async_tests/test_interfaces.py index 340ba4fd2..e40fb7106 100644 --- a/tests/async_tests/test_interfaces.py +++ b/tests/async_tests/test_interfaces.py @@ -156,7 +156,7 @@ async def test_http_request_cannot_reuse_dropped_connection( # Mock the connection as having been dropped. connection = list(http._connections[url[:3]])[0] # type: ignore - connection.is_connection_dropped = lambda: True # type: ignore + connection.is_socket_readable = lambda: True # type: ignore method = b"GET" url = (b"http", *server.netloc, b"/") diff --git a/tests/sync_tests/test_connection_pool.py b/tests/sync_tests/test_connection_pool.py index 312f96a0d..ca5cb433e 100644 --- a/tests/sync_tests/test_connection_pool.py +++ b/tests/sync_tests/test_connection_pool.py @@ -49,7 +49,7 @@ def info(self) -> str: def mark_as_ready(self) -> None: self.state = ConnectionState.READY - def is_connection_dropped(self) -> bool: + def is_socket_readable(self) -> bool: return False diff --git a/tests/sync_tests/test_interfaces.py b/tests/sync_tests/test_interfaces.py index 8e6666510..32b33fc67 100644 --- a/tests/sync_tests/test_interfaces.py +++ b/tests/sync_tests/test_interfaces.py @@ -156,7 +156,7 @@ def test_http_request_cannot_reuse_dropped_connection( # Mock the connection as having been dropped. connection = list(http._connections[url[:3]])[0] # type: ignore - connection.is_connection_dropped = lambda: True # type: ignore + connection.is_socket_readable = lambda: True # type: ignore method = b"GET" url = (b"http", *server.netloc, b"/") From b47122b2ea8824c79269f2335df1ab9ba171b07c Mon Sep 17 00:00:00 2001 From: florimondmanca Date: Sat, 17 Oct 2020 13:45:21 +0200 Subject: [PATCH 7/7] Switch to stealing util from trio --- httpcore/_utils.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/httpcore/_utils.py b/httpcore/_utils.py index 71f6e13a4..2520549e5 100644 --- a/httpcore/_utils.py +++ b/httpcore/_utils.py @@ -1,7 +1,7 @@ import itertools import logging import os -import selectors +import select import sys import typing @@ -80,18 +80,17 @@ def is_socket_readable(sock_fd: int) -> bool: "A socket is readable" means that the read buffer isn't empty, i.e. that calling .recv() on it would immediately return some data. """ - # NOTE: check for readability without actually attempting to read, because we don't - # want to block forever if it's not readable. Instead, we use a select-based - # approach. - # Note that we use `selectors` rather than `select`, because of known limitations - # of `select()` on Linux when dealing with many open file descriptors. - # See: https://github.com/encode/httpcore/issues/182 - # On Windows `select()` is just fine, and it also happens to be what the - # `selectors.DefaultSelector()` class uses. - # So, all in all, `selectors` should work just fine everywhere. - # See: https://github.com/encode/httpcore/pull/193#issuecomment-703129316 - sel = selectors.DefaultSelector() - event = selectors.EVENT_READ - sel.register(sock_fd, event) - read_ready = [key.fileobj for key, mask in sel.select(0) if mask & event] - return len(read_ready) > 0 + # NOTE: we want check for readability without actually attempting to read, because + # we don't want to block forever if it's not readable. + + # The implementation below was stolen from: + # https://github.com/python-trio/trio/blob/20ee2b1b7376db637435d80e266212a35837ddcc/trio/_socket.py#L471-L478 + # See also: https://github.com/encode/httpcore/pull/193#issuecomment-703129316 + + # Use select.select on Windows, and select.poll everywhere else + if sys.platform == "win32": + rready, _, _ = select.select([sock_fd], [], [], 0) + return bool(rready) + p = select.poll() + p.register(sock_fd, select.POLLIN) + return bool(p.poll(0))