[Serve] Apply request_timeout_s from Serve config to the cluster (r…

…ay-project#37884) Currently, the Serve config's `request_timeout_s` field is ignored. This change makes Serve pass in `request_timeout_s` to the HTTP Proxies when starting up, so the field is respected.
harborn · Aug 17, 2023 · 2f6c87e · 2f6c87e
1 parent 4bab8f2
commit 2f6c87e
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 42 deletions.
diff --git a/dashboard/modules/serve/serve_agent.py b/dashboard/modules/serve/serve_agent.py
@@ -251,51 +251,35 @@ async def put_all_applications(self, req: Request) -> Response:
         from ray._private.usage.usage_lib import TagKey, record_extra_usage_tag
 
         try:
-            config = ServeDeploySchema.parse_obj(await req.json())
+            config: ServeDeploySchema = ServeDeploySchema.parse_obj(await req.json())
         except ValidationError as e:
             return Response(
                 status=400,
                 text=repr(e),
             )
 
+        config_http_options = config.http_options.dict()
+        full_http_options = dict(
+            {"location": config.proxy_location}, **config_http_options
+        )
+
         async with self._controller_start_lock:
             client = await serve_start_async(
                 detached=True,
-                http_options={
-                    "host": config.http_options.host,
-                    "port": config.http_options.port,
-                    "root_path": config.http_options.root_path,
-                    "location": config.proxy_location,
-                },
+                http_options=full_http_options,
             )
 
-        # Check HTTP Host
-        host_conflict = self.check_http_options(
-            "host", client.http_config.host, config.http_options.host
-        )
-        if host_conflict is not None:
-            return host_conflict
-
-        # Check HTTP Port
-        port_conflict = self.check_http_options(
-            "port", client.http_config.port, config.http_options.port
-        )
-        if port_conflict is not None:
-            return port_conflict
-
-        # Check HTTP root path
-        root_path_conflict = self.check_http_options(
-            "root path", client.http_config.root_path, config.http_options.root_path
-        )
-        if root_path_conflict is not None:
-            return root_path_conflict
-
-        # Check HTTP location
-        location_conflict = self.check_http_options(
-            "location", client.http_config.location, config.proxy_location
-        )
-        if location_conflict is not None:
-            return location_conflict
+        # Serve ignores HTTP options if it was already running when
+        # serve_start_async() is called. We explicitly return an error response
+        # here if Serve's HTTP options don't match the config's options.
+        for option, requested_value in full_http_options.items():
+            conflict_response = self.check_http_options(
+                option,
+                getattr(client.http_config, option),
+                requested_value,
+            )
+            if conflict_response is not None:
+                return conflict_response
 
         try:
             client.deploy_apps(config)

diff --git a/doc/source/serve/advanced-guides/performance.md b/doc/source/serve/advanced-guides/performance.md
@@ -121,11 +121,6 @@ proper backpressure. You can increase the value in the deployment decorator; e.g
 (serve-performance-e2e-timeout)=
 ### Set an end-to-end request timeout
 
-By default, Serve lets client HTTP requests run to completion no matter how long they take. However, slow requests could bottleneck the replica processing, blocking other requests that are waiting. It's recommended that you set an end-to-end timeout, so slow requests can be terminated and retried at another replica.
+By default, Serve lets client HTTP requests run to completion no matter how long they take. However, slow requests could bottleneck the replica processing, blocking other requests that are waiting. It's recommended that you set an end-to-end timeout, so slow requests can be terminated and retried.
 
-You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` in the `http_options` field of the Serve config. HTTP Proxies will wait for that many seconds before terminating an HTTP request and retrying it at another replica. This config is global to your Ray cluster, and it cannot be updated during runtime.
-
-(serve-performance-http-retry)=
-### Set request retry times
-
-By default, the Serve HTTP proxy retries up to `10` times when a response is not received due to failures (e.g. network disconnect, request timeout, etc.).
+You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` in the `http_options` field of the Serve config. HTTP Proxies will wait for that many seconds before terminating an HTTP request. This config is global to your Ray cluster, and it cannot be updated during runtime. Use [client-side retries](serve-best-practices-http-requests) to retry requests that time out due to transient failures.
diff --git a/doc/source/serve/production-guide/best-practices.md b/doc/source/serve/production-guide/best-practices.md
@@ -102,6 +102,8 @@ deployment_statuses:
 
 For Kubernetes deployments with KubeRay, tighter integrations of `serve status` with Kubernetes are available. See [Getting the status of Serve applications in Kubernetes](serve-getting-status-kubernetes).
 
+(serve-best-practices-http-requests)=
+
 ## Best practices for HTTP requests
 
 Most examples in these docs use straightforward `get` or `post` requests using Python's `requests` library, such as:

diff --git a/python/ray/serve/tests/test_request_timeout.py b/python/ray/serve/tests/test_request_timeout.py
@@ -7,9 +7,12 @@
 import requests
 
 import ray
-from ray._private.test_utils import SignalActor
+from ray._private.test_utils import SignalActor, wait_for_condition
+from ray.dashboard.modules.serve.sdk import ServeSubmissionClient
 
 from ray import serve
+from ray.serve.schema import ServeInstanceDetails
+from ray.serve._private.common import ApplicationStatus
 from ray.serve._private.constants import RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING
 
 
@@ -102,6 +105,66 @@ async def __call__(self):
     ray.get(signal_actor.send.remote())
 
 
+@serve.deployment(graceful_shutdown_timeout_s=0)
+class HangsOnFirstRequest:
+    def __init__(self):
+        self._saw_first_request = False
+        self.signal_actor = SignalActor.remote()
+
+    async def __call__(self):
+        if not self._saw_first_request:
+            self._saw_first_request = True
+            await self.signal_actor.wait.remote()
+        else:
+            ray.get(self.signal_actor.send.remote())
+        return "Success!"
+
+
+hangs_on_first_request_app = HangsOnFirstRequest.bind()
+
+
+def test_with_rest_api(ray_start_stop):
+    """Verify the REST API can configure the request timeout."""
+    config = {
+        "proxy_location": "EveryNode",
+        "http_options": {"request_timeout_s": 1},
+        "applications": [
+            {
+                "name": "app",
+                "route_prefix": "/",
+                "import_path": (
+                    "ray.serve.tests." "test_request_timeout:hangs_on_first_request_app"
+                ),
+            }
+        ],
+    }
+    ServeSubmissionClient("http://localhost:52365").deploy_applications(config)
+
+    def application_running():
+        response = requests.get(
+            "http://localhost:52365/api/serve/applications/", timeout=15
+        )
+        assert response.status_code == 200
+
+        serve_details = ServeInstanceDetails(**response.json())
+        return serve_details.applications["app"].status == ApplicationStatus.RUNNING
+
+    wait_for_condition(application_running, timeout=15)
+    print("Application has started running. Testing requests...")
+
+    response = requests.get("http://localhost:8000")
+    if RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING:
+        assert response.status_code == 500
+    else:
+        assert response.status_code == 200
+        assert response.text == "Success!"
+
+    response = requests.get("http://localhost:8000")
+    assert response.status_code == 200
+    print("Requests succeeded! Deleting application.")
+    ServeSubmissionClient("http://localhost:52365").delete_applications()
+
+
 @pytest.mark.parametrize(
     "ray_instance",
     [