From 033382e8a93120378f38961bd964cb1745481b9f Mon Sep 17 00:00:00 2001 From: Dmitry Meyer Date: Wed, 5 Mar 2025 10:32:37 +0000 Subject: [PATCH] Set lower and upper limits of `utilization_policy.time_window` Fixes: https://github.com/dstackai/dstack/issues/2384 --- .../reference/dstack.yml/dev-environment.md | 8 ++++++ docs/docs/reference/dstack.yml/service.md | 28 ++++++++++++------- docs/docs/reference/dstack.yml/task.md | 8 ++++++ src/dstack/_internal/core/models/profiles.py | 21 ++++++++++++-- src/dstack/_internal/server/services/runs.py | 9 ++++++ src/dstack/api/utils.py | 3 ++ 6 files changed, 64 insertions(+), 13 deletions(-) diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/docs/docs/reference/dstack.yml/dev-environment.md index 2bcd9794c..ed6103307 100644 --- a/docs/docs/reference/dstack.yml/dev-environment.md +++ b/docs/docs/reference/dstack.yml/dev-environment.md @@ -18,6 +18,14 @@ The `dev-environment` configuration type allows running [dev environments](../.. type: required: true +### `utilization_policy` + +#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy + overrides: + show_root_heading: false + type: + required: true + ### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md index 58e272f02..7875f4037 100644 --- a/docs/docs/reference/dstack.yml/service.md +++ b/docs/docs/reference/dstack.yml/service.md @@ -24,7 +24,7 @@ The `service` configuration type allows running [services](../../concepts/servic > TGI provides an OpenAI-compatible API starting with version 1.4.0, so models served by TGI can be defined with `format: openai` too. - + #SCHEMA# dstack.api.TGIChatModel overrides: show_root_heading: false @@ -32,23 +32,23 @@ The `service` configuration type allows running [services](../../concepts/servic required: true ??? info "Chat template" - + By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) from the model's repository. If it is not present there, manual configuration is required. - + ```yaml type: service - + image: ghcr.io/huggingface/text-generation-inference:latest env: - MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ commands: - text-generation-launcher --port 8000 --trust-remote-code --quantize gptq port: 8000 - + resources: gpu: 80GB - + # Enable the OpenAI-compatible endpoint model: type: chat @@ -57,13 +57,13 @@ The `service` configuration type allows running [services](../../concepts/servic chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' }}{% endif %}{% endfor %}" eos_token: "" ``` - + Please note that model mapping is an experimental feature with the following limitations: - + 1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself. 2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template). - - If you encounter any other issues, please make sure to file a + + If you encounter any other issues, please make sure to file a [GitHub issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/new/choose){:target="_blank"}. ### `scaling` @@ -80,6 +80,14 @@ The `service` configuration type allows running [services](../../concepts/servic overrides: show_root_heading: false +### `utilization_policy` + +#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy + overrides: + show_root_heading: false + type: + required: true + ### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md index f08cf06cb..7ba1aeb61 100644 --- a/docs/docs/reference/dstack.yml/task.md +++ b/docs/docs/reference/dstack.yml/task.md @@ -18,6 +18,14 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md). type: required: true +### `utilization_policy` + +#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy + overrides: + show_root_heading: false + type: + required: true + ### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema diff --git a/src/dstack/_internal/core/models/profiles.py b/src/dstack/_internal/core/models/profiles.py index 80b64eb9a..a289b7960 100644 --- a/src/dstack/_internal/core/models/profiles.py +++ b/src/dstack/_internal/core/models/profiles.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import List, Optional, Union +from typing import List, Optional, Union, overload from pydantic import Field, root_validator, validator from typing_extensions import Annotated, Literal @@ -34,6 +34,14 @@ class TerminationPolicy(str, Enum): DESTROY_AFTER_IDLE = "destroy-after-idle" +@overload +def parse_duration(v: None) -> None: ... + + +@overload +def parse_duration(v: Union[int, str]) -> int: ... + + def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]: if v is None: return None @@ -113,6 +121,8 @@ def _validate_fields(cls, values): class UtilizationPolicy(CoreModel): + _min_time_window = "5m" + min_gpu_utilization: Annotated[ int, Field( @@ -130,12 +140,17 @@ class UtilizationPolicy(CoreModel): Field( description=( "The time window of metric samples taking into account to measure utilization" - " (e.g., `30m`, `1h`)" + f" (e.g., `30m`, `1h`). Minimum is `{_min_time_window}`" ) ), ] - _validate_time_window = validator("time_window", pre=True, allow_reuse=True)(parse_duration) + @validator("time_window", pre=True) + def validate_time_window(cls, v: Union[int, str]) -> int: + v = parse_duration(v) + if v < parse_duration(cls._min_time_window): + raise ValueError(f"Minimum time_window is {cls._min_time_window}") + return v class ProfileParams(CoreModel): diff --git a/src/dstack/_internal/server/services/runs.py b/src/dstack/_internal/server/services/runs.py index befb9edc4..44f5520a6 100644 --- a/src/dstack/_internal/server/services/runs.py +++ b/src/dstack/_internal/server/services/runs.py @@ -48,6 +48,7 @@ ) from dstack._internal.core.services import validate_dstack_resource_name from dstack._internal.core.services.diff import diff_models +from dstack._internal.server import settings from dstack._internal.server.db import get_db from dstack._internal.server.models import ( JobModel, @@ -838,6 +839,14 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec): run_spec.repo_id = DEFAULT_VIRTUAL_REPO_ID if run_spec.repo_data is None: run_spec.repo_data = VirtualRunRepoData() + if ( + run_spec.merged_profile.utilization_policy is not None + and run_spec.merged_profile.utilization_policy.time_window + > settings.SERVER_METRICS_TTL_SECONDS + ): + raise ServerClientError( + f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s" + ) _UPDATABLE_SPEC_FIELDS = ["repo_code_hash", "configuration"] diff --git a/src/dstack/api/utils.py b/src/dstack/api/utils.py index 9471ce74b..303e71982 100644 --- a/src/dstack/api/utils.py +++ b/src/dstack/api/utils.py @@ -2,6 +2,7 @@ from typing import Optional, Tuple import yaml +from pydantic import ValidationError from dstack._internal.core.errors import ConfigurationError from dstack._internal.core.models.configurations import AnyRunConfiguration @@ -96,6 +97,8 @@ def _load_profile_from_path(profiles_path: Path, profile_name: Optional[str]) -> config = ProfilesConfig.parse_obj(yaml.safe_load(f)) except FileNotFoundError: return None + except ValidationError as e: + raise ConfigurationError(e) if profile_name is None: return config.default()