diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index dcba865..d4a6f69 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -1,3 +1,4 @@ +import os from typing import Callable, Optional import ray @@ -11,6 +12,7 @@ DEFAULT_RAY_ACTOR_OPRTIONS, DEFAULT_RUNTIME_ENV, ) +from instill.helpers.utils import get_dir_size class InstillDeployable: @@ -21,6 +23,7 @@ def __init__( use_gpu: bool, ) -> None: self._deployment: Deployment = deployable + self.use_gpu = use_gpu # params self.model_weight_or_folder_name: str = model_weight_or_folder_name if use_gpu: @@ -37,6 +40,17 @@ def _update_num_gpus(self, num_gpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_gpus": num_gpus}) + def _determine_vram_usage(self, model_path: str, vram: str): + if vram == "": + return 0.25 + if os.path.isfile(model_path): + return ( + 1.1 * os.path.getsize(model_path) / (1024 * 1024 * 1024) / float(vram) + ) + if os.path.isdir(model_path): + return 1.1 * get_dir_size(model_path) / (1024 * 1024 * 1024) / float(vram) + return 0.25 + def update_min_replicas(self, num_replicas: int): new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG new_autoscaling_config["min_replicas"] = num_replicas @@ -51,15 +65,20 @@ def update_max_replicas(self, num_replicas: int): autoscaling_config=new_autoscaling_config ) - def deploy(self, model_folder_path: str, ray_addr: str): + def deploy(self, model_folder_path: str, ray_addr: str, vram: str): if not ray.is_initialized(): ray_addr = "ray://" + ray_addr.replace("9000", "10001") ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV) + model_path = "/".join([model_folder_path, self.model_weight_or_folder_name]) model_path_string_parts = model_path.split("/") application_name = model_path_string_parts[5] model_name = "_".join(model_path_string_parts[3].split("#")[:2]) route_prefix = f'/{model_name}/{model_path_string_parts[3].split("#")[3]}' + + if self.use_gpu: + self._update_num_gpus(self._determine_vram_usage(model_path, vram)) + serve.run( self._deployment.options(name=application_name).bind(model_path), name=model_name, diff --git a/instill/helpers/utils.py b/instill/helpers/utils.py new file mode 100644 index 0000000..a81877d --- /dev/null +++ b/instill/helpers/utils.py @@ -0,0 +1,12 @@ +import os + + +def get_dir_size(path): + total = 0 + with os.scandir(path) as it: + for entry in it: + if entry.is_file(): + total += entry.stat().st_size + elif entry.is_dir(): + total += get_dir_size(entry.path) + return total