diff --git a/instill/helpers/const.py b/instill/helpers/const.py index ad78680..8435c1a 100644 --- a/instill/helpers/const.py +++ b/instill/helpers/const.py @@ -143,3 +143,4 @@ class ImageToImageInput: ENV_NUM_OF_MIN_REPLICAS = "RAY_NUM_OF_MIN_REPLICAS" ENV_NUM_OF_MAX_REPLICAS = "RAY_NUM_OF_MAX_REPLICAS" ENV_IS_TEST_MODEL = "RAY_IS_TEST_MODEL" +ENV_IS_HIGH_SCALE_MODEL = "RAY_IS_HIGH_SCALE_MODEL" diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index 373adb1..56358e9 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -10,6 +10,7 @@ DEFAULT_MAX_ONGOING_REQUESTS, DEFAULT_MAX_QUEUED_REQUESTS, DEFAULT_RAY_ACTOR_OPTIONS, + ENV_IS_HIGH_SCALE_MODEL, ENV_IS_TEST_MODEL, ENV_MEMORY, ENV_NUM_OF_CPUS, @@ -42,6 +43,11 @@ def __init__(self, deployable: Deployment) -> None: self._update_num_cpus(float(0.001)) self._update_downscale_delay(60) + is_high_scale_model = os.getenv(ENV_IS_HIGH_SCALE_MODEL) + if is_high_scale_model is not None and is_high_scale_model.lower() == "true": + self._update_upscale_delay(120) + self._update_downscale_delay(600) + memory = os.getenv(ENV_MEMORY) if memory is not None and memory != "": self._update_memory(float(memory)) @@ -181,6 +187,9 @@ def _update_max_replicas(self, num_replicas: int): return self + def _update_upscale_delay(self, upscale_delay_s: int): + self._autoscaling_config["upscale_delay_s"] = upscale_delay_s + def _update_downscale_delay(self, downscale_delay_s: int): self._autoscaling_config["downscale_delay_s"] = downscale_delay_s