From ebf27c7d5744f4a56d5283243c8403beca23dc99 Mon Sep 17 00:00:00 2001 From: Heiru Wu Date: Mon, 29 Jan 2024 04:44:40 +0800 Subject: [PATCH] chore: add vram override --- instill/helpers/const.py | 9 ++++++++ instill/helpers/errors.py | 5 +++++ instill/helpers/ray_config.py | 39 ++++++++++++++++++++++++----------- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/instill/helpers/const.py b/instill/helpers/const.py index fd92412..caca8db 100644 --- a/instill/helpers/const.py +++ b/instill/helpers/const.py @@ -104,5 +104,14 @@ class VisualQuestionAnsweringInput: } DEFAULT_MAX_CONCURRENT_QUERIES = 10 +RAM_MINIMUM_RESERVE = 1 # GB +RAM_UPSCALE_FACTOR = 1.25 VRAM_MINIMUM_RESERVE = 2 # GB VRAM_UPSCALE_FACTOR = 1.25 + +MODEL_VRAM_OVERRIDE_LIST = { + "04186ea4-829b-4ad9-8839-92e66f360068_stable-diffusion-xl": 0.45, + "04186ea4-829b-4ad9-8839-92e66f360068_controlnet-canny": 0.45, + "04186ea4-829b-4ad9-8839-92e66f360068_llava-7b": 0.4, + "04186ea4-829b-4ad9-8839-92e66f360068_llama2-7b-chat": 0.4, +} diff --git a/instill/helpers/errors.py b/instill/helpers/errors.py index ef03503..b937efb 100644 --- a/instill/helpers/errors.py +++ b/instill/helpers/errors.py @@ -1,3 +1,8 @@ class ModelPathException(Exception): def __str__(self) -> str: return "model path is not valid" + + +class ModelVramException(Exception): + def __str__(self) -> str: + return "model projected vram usage is more than the GPU can handle" diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index 3f4de8e..2a12286 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -1,4 +1,3 @@ -import math import os from typing import Callable, Optional @@ -12,10 +11,13 @@ DEFAULT_MAX_CONCURRENT_QUERIES, DEFAULT_RAY_ACTOR_OPRTIONS, DEFAULT_RUNTIME_ENV, + MODEL_VRAM_OVERRIDE_LIST, + RAM_MINIMUM_RESERVE, + RAM_UPSCALE_FACTOR, VRAM_MINIMUM_RESERVE, VRAM_UPSCALE_FACTOR, ) -from instill.helpers.errors import ModelPathException +from instill.helpers.errors import ModelPathException, ModelVramException from instill.helpers.utils import get_dir_size @@ -48,8 +50,8 @@ def _update_num_gpus(self, num_gpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_gpus": num_gpus}) - def _determine_vram_usage(self, model_path: str, vram: str): - if vram == "": + def _determine_vram_usage(self, model_path: str, total_vram: str): + if total_vram == "": return 0.25 if os.path.isfile(model_path): min_vram_usage = max( @@ -58,22 +60,30 @@ def _determine_vram_usage(self, model_path: str, vram: str): * os.path.getsize(model_path) / (1024 * 1024 * 1024), ) - ratio = min_vram_usage / float(vram) - return ratio if ratio <= 1 else math.ceil(ratio) + ratio = min_vram_usage / float(total_vram) + if ratio > 1: + raise ModelVramException + return ratio if os.path.isdir(model_path): min_vram_usage = max( VRAM_MINIMUM_RESERVE, VRAM_UPSCALE_FACTOR * get_dir_size(model_path) / (1024 * 1024 * 1024), ) - ratio = min_vram_usage / float(vram) - return ratio if ratio <= 1 else math.ceil(ratio) + ratio = min_vram_usage / float(total_vram) + if ratio > 1: + raise ModelVramException + return ratio raise ModelPathException def _determine_ram_usage(self, model_path: str): if os.path.isfile(model_path): - return 1.1 * os.path.getsize(model_path) + return max( + RAM_MINIMUM_RESERVE, RAM_UPSCALE_FACTOR * os.path.getsize(model_path) + ) if os.path.isdir(model_path): - return 1.1 * get_dir_size(model_path) + return max( + RAM_MINIMUM_RESERVE, RAM_UPSCALE_FACTOR * get_dir_size(model_path) + ) raise ModelPathException def update_min_replicas(self, num_replicas: int): @@ -90,7 +100,7 @@ def update_max_replicas(self, num_replicas: int): autoscaling_config=new_autoscaling_config ) - def deploy(self, model_folder_path: str, ray_addr: str, vram: str): + def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str): if not ray.is_initialized(): ray_addr = "ray://" + ray_addr.replace("9000", "10001") ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV) @@ -102,7 +112,12 @@ def deploy(self, model_folder_path: str, ray_addr: str, vram: str): route_prefix = f'/{model_name}/{model_path_string_parts[3].split("#")[3]}' if self.use_gpu: - self._update_num_gpus(self._determine_vram_usage(model_path, vram)) + if application_name in MODEL_VRAM_OVERRIDE_LIST: + self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[application_name]) + else: + self._update_num_gpus( + self._determine_vram_usage(model_path, total_vram) + ) else: self._update_memory(self._determine_ram_usage(model_path))