Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions instill/helpers/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class ModelPathException(Exception):
def __str__(self) -> str:
return "model path is not valid"
16 changes: 15 additions & 1 deletion instill/helpers/ray_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
DEFAULT_RAY_ACTOR_OPRTIONS,
DEFAULT_RUNTIME_ENV,
)
from instill.helpers.errors import ModelPathException
from instill.helpers.utils import get_dir_size


Expand All @@ -36,6 +37,10 @@ def _update_num_cpus(self, num_cpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_cpus": num_cpus})

def _update_memory(self, memory: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"memory": memory})

def _update_num_gpus(self, num_gpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_gpus": num_gpus})
Expand All @@ -49,7 +54,14 @@ def _determine_vram_usage(self, model_path: str, vram: str):
)
if os.path.isdir(model_path):
return 1.1 * get_dir_size(model_path) / (1024 * 1024 * 1024) / float(vram)
return 0.25
raise ModelPathException

def _determine_ram_usage(self, model_path: str):
if os.path.isfile(model_path):
return 1.1 * os.path.getsize(model_path)
if os.path.isdir(model_path):
return 1.1 * get_dir_size(model_path)
raise ModelPathException

def update_min_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
Expand Down Expand Up @@ -78,6 +90,8 @@ def deploy(self, model_folder_path: str, ray_addr: str, vram: str):

if self.use_gpu:
self._update_num_gpus(self._determine_vram_usage(model_path, vram))
else:
self._update_memory(self._determine_ram_usage(model_path))

serve.run(
self._deployment.options(name=application_name).bind(model_path),
Expand Down