diff --git a/instill/helpers/const.py b/instill/helpers/const.py
index ad78680..8435c1a 100644
--- a/instill/helpers/const.py
+++ b/instill/helpers/const.py
@@ -143,3 +143,4 @@ class ImageToImageInput:
 ENV_NUM_OF_MIN_REPLICAS = "RAY_NUM_OF_MIN_REPLICAS"
 ENV_NUM_OF_MAX_REPLICAS = "RAY_NUM_OF_MAX_REPLICAS"
 ENV_IS_TEST_MODEL = "RAY_IS_TEST_MODEL"
+ENV_IS_HIGH_SCALE_MODEL = "RAY_IS_HIGH_SCALE_MODEL"
diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py
index 373adb1..56358e9 100644
--- a/instill/helpers/ray_config.py
+++ b/instill/helpers/ray_config.py
@@ -10,6 +10,7 @@
     DEFAULT_MAX_ONGOING_REQUESTS,
     DEFAULT_MAX_QUEUED_REQUESTS,
     DEFAULT_RAY_ACTOR_OPTIONS,
+    ENV_IS_HIGH_SCALE_MODEL,
     ENV_IS_TEST_MODEL,
     ENV_MEMORY,
     ENV_NUM_OF_CPUS,
@@ -42,6 +43,11 @@ def __init__(self, deployable: Deployment) -> None:
             self._update_num_cpus(float(0.001))
             self._update_downscale_delay(60)
 
+        is_high_scale_model = os.getenv(ENV_IS_HIGH_SCALE_MODEL)
+        if is_high_scale_model is not None and is_high_scale_model.lower() == "true":
+            self._update_upscale_delay(120)
+            self._update_downscale_delay(600)
+
         memory = os.getenv(ENV_MEMORY)
         if memory is not None and memory != "":
             self._update_memory(float(memory))
@@ -181,6 +187,9 @@ def _update_max_replicas(self, num_replicas: int):
 
         return self
 
+    def _update_upscale_delay(self, upscale_delay_s: int):
+        self._autoscaling_config["upscale_delay_s"] = upscale_delay_s
+
     def _update_downscale_delay(self, downscale_delay_s: int):
         self._autoscaling_config["downscale_delay_s"] = downscale_delay_s