diff --git a/instill/helpers/errors.py b/instill/helpers/errors.py new file mode 100644 index 0000000..ef03503 --- /dev/null +++ b/instill/helpers/errors.py @@ -0,0 +1,3 @@ +class ModelPathException(Exception): + def __str__(self) -> str: + return "model path is not valid" diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index d4a6f69..b4f7826 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -12,6 +12,7 @@ DEFAULT_RAY_ACTOR_OPRTIONS, DEFAULT_RUNTIME_ENV, ) +from instill.helpers.errors import ModelPathException from instill.helpers.utils import get_dir_size @@ -36,6 +37,10 @@ def _update_num_cpus(self, num_cpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_cpus": num_cpus}) + def _update_memory(self, memory: float): + if self._deployment.ray_actor_options is not None: + self._deployment.ray_actor_options.update({"memory": memory}) + def _update_num_gpus(self, num_gpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_gpus": num_gpus}) @@ -49,7 +54,14 @@ def _determine_vram_usage(self, model_path: str, vram: str): ) if os.path.isdir(model_path): return 1.1 * get_dir_size(model_path) / (1024 * 1024 * 1024) / float(vram) - return 0.25 + raise ModelPathException + + def _determine_ram_usage(self, model_path: str): + if os.path.isfile(model_path): + return 1.1 * os.path.getsize(model_path) + if os.path.isdir(model_path): + return 1.1 * get_dir_size(model_path) + raise ModelPathException def update_min_replicas(self, num_replicas: int): new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG @@ -78,6 +90,8 @@ def deploy(self, model_folder_path: str, ray_addr: str, vram: str): if self.use_gpu: self._update_num_gpus(self._determine_vram_usage(model_path, vram)) + else: + self._update_memory(self._determine_ram_usage(model_path)) serve.run( self._deployment.options(name=application_name).bind(model_path),