Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions instill/helpers/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,14 @@ class VisualQuestionAnsweringInput:
}
DEFAULT_MAX_CONCURRENT_QUERIES = 10

RAM_MINIMUM_RESERVE = 1 # GB
RAM_UPSCALE_FACTOR = 1.25
VRAM_MINIMUM_RESERVE = 2 # GB
VRAM_UPSCALE_FACTOR = 1.25

MODEL_VRAM_OVERRIDE_LIST = {
"04186ea4-829b-4ad9-8839-92e66f360068_stable-diffusion-xl": 0.45,
"04186ea4-829b-4ad9-8839-92e66f360068_controlnet-canny": 0.45,
"04186ea4-829b-4ad9-8839-92e66f360068_llava-7b": 0.4,
"04186ea4-829b-4ad9-8839-92e66f360068_llama2-7b-chat": 0.4,
}
5 changes: 5 additions & 0 deletions instill/helpers/errors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
class ModelPathException(Exception):
def __str__(self) -> str:
return "model path is not valid"


class ModelVramException(Exception):
def __str__(self) -> str:
return "model projected vram usage is more than the GPU can handle"
39 changes: 27 additions & 12 deletions instill/helpers/ray_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import math
import os
from typing import Callable, Optional

Expand All @@ -12,10 +11,13 @@
DEFAULT_MAX_CONCURRENT_QUERIES,
DEFAULT_RAY_ACTOR_OPRTIONS,
DEFAULT_RUNTIME_ENV,
MODEL_VRAM_OVERRIDE_LIST,
RAM_MINIMUM_RESERVE,
RAM_UPSCALE_FACTOR,
VRAM_MINIMUM_RESERVE,
VRAM_UPSCALE_FACTOR,
)
from instill.helpers.errors import ModelPathException
from instill.helpers.errors import ModelPathException, ModelVramException
from instill.helpers.utils import get_dir_size


Expand Down Expand Up @@ -48,8 +50,8 @@ def _update_num_gpus(self, num_gpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_gpus": num_gpus})

def _determine_vram_usage(self, model_path: str, vram: str):
if vram == "":
def _determine_vram_usage(self, model_path: str, total_vram: str):
if total_vram == "":
return 0.25
if os.path.isfile(model_path):
min_vram_usage = max(
Expand All @@ -58,22 +60,30 @@ def _determine_vram_usage(self, model_path: str, vram: str):
* os.path.getsize(model_path)
/ (1024 * 1024 * 1024),
)
ratio = min_vram_usage / float(vram)
return ratio if ratio <= 1 else math.ceil(ratio)
ratio = min_vram_usage / float(total_vram)
if ratio > 1:
raise ModelVramException
return ratio
if os.path.isdir(model_path):
min_vram_usage = max(
VRAM_MINIMUM_RESERVE,
VRAM_UPSCALE_FACTOR * get_dir_size(model_path) / (1024 * 1024 * 1024),
)
ratio = min_vram_usage / float(vram)
return ratio if ratio <= 1 else math.ceil(ratio)
ratio = min_vram_usage / float(total_vram)
if ratio > 1:
raise ModelVramException
return ratio
raise ModelPathException

def _determine_ram_usage(self, model_path: str):
if os.path.isfile(model_path):
return 1.1 * os.path.getsize(model_path)
return max(
RAM_MINIMUM_RESERVE, RAM_UPSCALE_FACTOR * os.path.getsize(model_path)
)
if os.path.isdir(model_path):
return 1.1 * get_dir_size(model_path)
return max(
RAM_MINIMUM_RESERVE, RAM_UPSCALE_FACTOR * get_dir_size(model_path)
)
raise ModelPathException

def update_min_replicas(self, num_replicas: int):
Expand All @@ -90,7 +100,7 @@ def update_max_replicas(self, num_replicas: int):
autoscaling_config=new_autoscaling_config
)

def deploy(self, model_folder_path: str, ray_addr: str, vram: str):
def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str):
if not ray.is_initialized():
ray_addr = "ray://" + ray_addr.replace("9000", "10001")
ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)
Expand All @@ -102,7 +112,12 @@ def deploy(self, model_folder_path: str, ray_addr: str, vram: str):
route_prefix = f'/{model_name}/{model_path_string_parts[3].split("#")[3]}'

if self.use_gpu:
self._update_num_gpus(self._determine_vram_usage(model_path, vram))
if application_name in MODEL_VRAM_OVERRIDE_LIST:
self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[application_name])
else:
self._update_num_gpus(
self._determine_vram_usage(model_path, total_vram)
)
else:
self._update_memory(self._determine_ram_usage(model_path))

Expand Down