instill-ai · heiruwu · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024
@@ -0,0 +1,3 @@
+class ModelPathException(Exception):
+    def __str__(self) -> str:
+        return "model path is not valid"
@@ -12,6 +12,7 @@
     DEFAULT_RAY_ACTOR_OPRTIONS,
     DEFAULT_RUNTIME_ENV,
 )
+from instill.helpers.errors import ModelPathException
 from instill.helpers.utils import get_dir_size
 
 
@@ -36,6 +37,10 @@ def _update_num_cpus(self, num_cpus: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"num_cpus": num_cpus})
 
+    def _update_memory(self, memory: float):
+        if self._deployment.ray_actor_options is not None:
+            self._deployment.ray_actor_options.update({"memory": memory})
+
     def _update_num_gpus(self, num_gpus: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"num_gpus": num_gpus})
@@ -49,7 +54,14 @@ def _determine_vram_usage(self, model_path: str, vram: str):
             )
         if os.path.isdir(model_path):
             return 1.1 * get_dir_size(model_path) / (1024 * 1024 * 1024) / float(vram)
-        return 0.25
+        raise ModelPathException
+
+    def _determine_ram_usage(self, model_path: str):
+        if os.path.isfile(model_path):
+            return 1.1 * os.path.getsize(model_path)
+        if os.path.isdir(model_path):
+            return 1.1 * get_dir_size(model_path)
+        raise ModelPathException
 
     def update_min_replicas(self, num_replicas: int):
         new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
@@ -78,6 +90,8 @@ def deploy(self, model_folder_path: str, ray_addr: str, vram: str):
 
         if self.use_gpu:
             self._update_num_gpus(self._determine_vram_usage(model_path, vram))
+        else:
+            self._update_memory(self._determine_ram_usage(model_path))
 
         serve.run(
             self._deployment.options(name=application_name).bind(model_path),