From e127e8a86367d84fc162ddb2f5f2b8c14872b723 Mon Sep 17 00:00:00 2001 From: Heiru Wu Date: Wed, 12 Mar 2025 05:08:24 +0800 Subject: [PATCH] fix(ray): align autoscale config --- instill/helpers/const.py | 5 +++-- instill/helpers/ray_config.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/instill/helpers/const.py b/instill/helpers/const.py index 8435c1a..cf1aae7 100644 --- a/instill/helpers/const.py +++ b/instill/helpers/const.py @@ -107,6 +107,7 @@ class ImageToImageInput: "num_cpus": 0, } DEFAULT_AUTOSCALING_CONFIG = { + "target_num_ongoing_requests_per_replica": 2, "target_ongoing_requests": 2, "initial_replicas": 1, "min_replicas": 0, @@ -124,8 +125,8 @@ class ImageToImageInput: "PYTHONPATH": os.getcwd(), }, } -DEFAULT_MAX_ONGOING_REQUESTS = 5 -DEFAULT_MAX_QUEUED_REQUESTS = 10 +DEFAULT_MAX_ONGOING_REQUESTS = 6 +DEFAULT_MAX_QUEUED_REQUESTS = 100 RAM_MINIMUM_RESERVE = 1 # GB RAM_UPSCALE_FACTOR = 1.25 diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index 56358e9..fc28977 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -40,7 +40,9 @@ def __init__(self, deployable: Deployment) -> None: is_test_model = os.getenv(ENV_IS_TEST_MODEL) if is_test_model is not None and is_test_model.lower() == "true": + self._update_max_replicas(1000) self._update_num_cpus(float(0.001)) + self._update_upscale_delay(30) self._update_downscale_delay(60) is_high_scale_model = os.getenv(ENV_IS_HIGH_SCALE_MODEL) @@ -205,5 +207,6 @@ def instill_deployment( ray_actor_options=DEFAULT_RAY_ACTOR_OPTIONS, autoscaling_config=DEFAULT_AUTOSCALING_CONFIG, max_ongoing_requests=DEFAULT_MAX_ONGOING_REQUESTS, + max_concurrent_queries=DEFAULT_MAX_ONGOING_REQUESTS, max_queued_requests=DEFAULT_MAX_QUEUED_REQUESTS, )