Skip to content

Commit

Permalink
[Serve] Apply request_timeout_s from Serve config to the cluster (r…
Browse files Browse the repository at this point in the history
…ay-project#37884)

Currently, the Serve config's `request_timeout_s` field is ignored. This change makes Serve pass in `request_timeout_s` to the HTTP Proxies when starting up, so the field is respected.
  • Loading branch information
shrekris-anyscale authored and harborn committed Aug 17, 2023
1 parent 4bab8f2 commit 2f6c87e
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 42 deletions.
52 changes: 18 additions & 34 deletions dashboard/modules/serve/serve_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,51 +251,35 @@ async def put_all_applications(self, req: Request) -> Response:
from ray._private.usage.usage_lib import TagKey, record_extra_usage_tag

try:
config = ServeDeploySchema.parse_obj(await req.json())
config: ServeDeploySchema = ServeDeploySchema.parse_obj(await req.json())
except ValidationError as e:
return Response(
status=400,
text=repr(e),
)

config_http_options = config.http_options.dict()
full_http_options = dict(
{"location": config.proxy_location}, **config_http_options
)

async with self._controller_start_lock:
client = await serve_start_async(
detached=True,
http_options={
"host": config.http_options.host,
"port": config.http_options.port,
"root_path": config.http_options.root_path,
"location": config.proxy_location,
},
http_options=full_http_options,
)

# Check HTTP Host
host_conflict = self.check_http_options(
"host", client.http_config.host, config.http_options.host
)
if host_conflict is not None:
return host_conflict

# Check HTTP Port
port_conflict = self.check_http_options(
"port", client.http_config.port, config.http_options.port
)
if port_conflict is not None:
return port_conflict

# Check HTTP root path
root_path_conflict = self.check_http_options(
"root path", client.http_config.root_path, config.http_options.root_path
)
if root_path_conflict is not None:
return root_path_conflict

# Check HTTP location
location_conflict = self.check_http_options(
"location", client.http_config.location, config.proxy_location
)
if location_conflict is not None:
return location_conflict
# Serve ignores HTTP options if it was already running when
# serve_start_async() is called. We explicitly return an error response
# here if Serve's HTTP options don't match the config's options.
for option, requested_value in full_http_options.items():
conflict_response = self.check_http_options(
option,
getattr(client.http_config, option),
requested_value,
)
if conflict_response is not None:
return conflict_response

try:
client.deploy_apps(config)
Expand Down
9 changes: 2 additions & 7 deletions doc/source/serve/advanced-guides/performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,6 @@ proper backpressure. You can increase the value in the deployment decorator; e.g
(serve-performance-e2e-timeout)=
### Set an end-to-end request timeout

By default, Serve lets client HTTP requests run to completion no matter how long they take. However, slow requests could bottleneck the replica processing, blocking other requests that are waiting. It's recommended that you set an end-to-end timeout, so slow requests can be terminated and retried at another replica.
By default, Serve lets client HTTP requests run to completion no matter how long they take. However, slow requests could bottleneck the replica processing, blocking other requests that are waiting. It's recommended that you set an end-to-end timeout, so slow requests can be terminated and retried.

You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` in the `http_options` field of the Serve config. HTTP Proxies will wait for that many seconds before terminating an HTTP request and retrying it at another replica. This config is global to your Ray cluster, and it cannot be updated during runtime.

(serve-performance-http-retry)=
### Set request retry times

By default, the Serve HTTP proxy retries up to `10` times when a response is not received due to failures (e.g. network disconnect, request timeout, etc.).
You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` in the `http_options` field of the Serve config. HTTP Proxies will wait for that many seconds before terminating an HTTP request. This config is global to your Ray cluster, and it cannot be updated during runtime. Use [client-side retries](serve-best-practices-http-requests) to retry requests that time out due to transient failures.
2 changes: 2 additions & 0 deletions doc/source/serve/production-guide/best-practices.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ deployment_statuses:

For Kubernetes deployments with KubeRay, tighter integrations of `serve status` with Kubernetes are available. See [Getting the status of Serve applications in Kubernetes](serve-getting-status-kubernetes).

(serve-best-practices-http-requests)=

## Best practices for HTTP requests

Most examples in these docs use straightforward `get` or `post` requests using Python's `requests` library, such as:
Expand Down
65 changes: 64 additions & 1 deletion python/ray/serve/tests/test_request_timeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
import requests

import ray
from ray._private.test_utils import SignalActor
from ray._private.test_utils import SignalActor, wait_for_condition
from ray.dashboard.modules.serve.sdk import ServeSubmissionClient

from ray import serve
from ray.serve.schema import ServeInstanceDetails
from ray.serve._private.common import ApplicationStatus
from ray.serve._private.constants import RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING


Expand Down Expand Up @@ -102,6 +105,66 @@ async def __call__(self):
ray.get(signal_actor.send.remote())


@serve.deployment(graceful_shutdown_timeout_s=0)
class HangsOnFirstRequest:
def __init__(self):
self._saw_first_request = False
self.signal_actor = SignalActor.remote()

async def __call__(self):
if not self._saw_first_request:
self._saw_first_request = True
await self.signal_actor.wait.remote()
else:
ray.get(self.signal_actor.send.remote())
return "Success!"


hangs_on_first_request_app = HangsOnFirstRequest.bind()


def test_with_rest_api(ray_start_stop):
"""Verify the REST API can configure the request timeout."""
config = {
"proxy_location": "EveryNode",
"http_options": {"request_timeout_s": 1},
"applications": [
{
"name": "app",
"route_prefix": "/",
"import_path": (
"ray.serve.tests." "test_request_timeout:hangs_on_first_request_app"
),
}
],
}
ServeSubmissionClient("http://localhost:52365").deploy_applications(config)

def application_running():
response = requests.get(
"http://localhost:52365/api/serve/applications/", timeout=15
)
assert response.status_code == 200

serve_details = ServeInstanceDetails(**response.json())
return serve_details.applications["app"].status == ApplicationStatus.RUNNING

wait_for_condition(application_running, timeout=15)
print("Application has started running. Testing requests...")

response = requests.get("http://localhost:8000")
if RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING:
assert response.status_code == 500
else:
assert response.status_code == 200
assert response.text == "Success!"

response = requests.get("http://localhost:8000")
assert response.status_code == 200
print("Requests succeeded! Deleting application.")
ServeSubmissionClient("http://localhost:52365").delete_applications()


@pytest.mark.parametrize(
"ray_instance",
[
Expand Down

0 comments on commit 2f6c87e

Please sign in to comment.