diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/docs/docs/reference/dstack.yml/dev-environment.md
index 2bcd9794c..ed6103307 100644
--- a/docs/docs/reference/dstack.yml/dev-environment.md
+++ b/docs/docs/reference/dstack.yml/dev-environment.md
@@ -18,6 +18,14 @@ The `dev-environment` configuration type allows running [dev environments](../..
type:
required: true
+### `utilization_policy`
+
+#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
+ overrides:
+ show_root_heading: false
+ type:
+ required: true
+
### `resources`
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema
diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md
index 58e272f02..7875f4037 100644
--- a/docs/docs/reference/dstack.yml/service.md
+++ b/docs/docs/reference/dstack.yml/service.md
@@ -24,7 +24,7 @@ The `service` configuration type allows running [services](../../concepts/servic
> TGI provides an OpenAI-compatible API starting with version 1.4.0,
so models served by TGI can be defined with `format: openai` too.
-
+
#SCHEMA# dstack.api.TGIChatModel
overrides:
show_root_heading: false
@@ -32,23 +32,23 @@ The `service` configuration type allows running [services](../../concepts/servic
required: true
??? info "Chat template"
-
+
By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating)
from the model's repository. If it is not present there, manual configuration is required.
-
+
```yaml
type: service
-
+
image: ghcr.io/huggingface/text-generation-inference:latest
env:
- MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ
commands:
- text-generation-launcher --port 8000 --trust-remote-code --quantize gptq
port: 8000
-
+
resources:
gpu: 80GB
-
+
# Enable the OpenAI-compatible endpoint
model:
type: chat
@@ -57,13 +57,13 @@ The `service` configuration type allows running [services](../../concepts/servic
chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' }}{% endif %}{% endfor %}"
eos_token: ""
```
-
+
Please note that model mapping is an experimental feature with the following limitations:
-
+
1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself.
2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template).
-
- If you encounter any other issues, please make sure to file a
+
+ If you encounter any other issues, please make sure to file a
[GitHub issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/new/choose){:target="_blank"}.
### `scaling`
@@ -80,6 +80,14 @@ The `service` configuration type allows running [services](../../concepts/servic
overrides:
show_root_heading: false
+### `utilization_policy`
+
+#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
+ overrides:
+ show_root_heading: false
+ type:
+ required: true
+
### `resources`
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema
diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md
index f08cf06cb..7ba1aeb61 100644
--- a/docs/docs/reference/dstack.yml/task.md
+++ b/docs/docs/reference/dstack.yml/task.md
@@ -18,6 +18,14 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md).
type:
required: true
+### `utilization_policy`
+
+#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
+ overrides:
+ show_root_heading: false
+ type:
+ required: true
+
### `resources`
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema
diff --git a/src/dstack/_internal/core/models/profiles.py b/src/dstack/_internal/core/models/profiles.py
index 80b64eb9a..a289b7960 100644
--- a/src/dstack/_internal/core/models/profiles.py
+++ b/src/dstack/_internal/core/models/profiles.py
@@ -1,5 +1,5 @@
from enum import Enum
-from typing import List, Optional, Union
+from typing import List, Optional, Union, overload
from pydantic import Field, root_validator, validator
from typing_extensions import Annotated, Literal
@@ -34,6 +34,14 @@ class TerminationPolicy(str, Enum):
DESTROY_AFTER_IDLE = "destroy-after-idle"
+@overload
+def parse_duration(v: None) -> None: ...
+
+
+@overload
+def parse_duration(v: Union[int, str]) -> int: ...
+
+
def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]:
if v is None:
return None
@@ -113,6 +121,8 @@ def _validate_fields(cls, values):
class UtilizationPolicy(CoreModel):
+ _min_time_window = "5m"
+
min_gpu_utilization: Annotated[
int,
Field(
@@ -130,12 +140,17 @@ class UtilizationPolicy(CoreModel):
Field(
description=(
"The time window of metric samples taking into account to measure utilization"
- " (e.g., `30m`, `1h`)"
+ f" (e.g., `30m`, `1h`). Minimum is `{_min_time_window}`"
)
),
]
- _validate_time_window = validator("time_window", pre=True, allow_reuse=True)(parse_duration)
+ @validator("time_window", pre=True)
+ def validate_time_window(cls, v: Union[int, str]) -> int:
+ v = parse_duration(v)
+ if v < parse_duration(cls._min_time_window):
+ raise ValueError(f"Minimum time_window is {cls._min_time_window}")
+ return v
class ProfileParams(CoreModel):
diff --git a/src/dstack/_internal/server/services/runs.py b/src/dstack/_internal/server/services/runs.py
index befb9edc4..44f5520a6 100644
--- a/src/dstack/_internal/server/services/runs.py
+++ b/src/dstack/_internal/server/services/runs.py
@@ -48,6 +48,7 @@
)
from dstack._internal.core.services import validate_dstack_resource_name
from dstack._internal.core.services.diff import diff_models
+from dstack._internal.server import settings
from dstack._internal.server.db import get_db
from dstack._internal.server.models import (
JobModel,
@@ -838,6 +839,14 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
run_spec.repo_id = DEFAULT_VIRTUAL_REPO_ID
if run_spec.repo_data is None:
run_spec.repo_data = VirtualRunRepoData()
+ if (
+ run_spec.merged_profile.utilization_policy is not None
+ and run_spec.merged_profile.utilization_policy.time_window
+ > settings.SERVER_METRICS_TTL_SECONDS
+ ):
+ raise ServerClientError(
+ f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s"
+ )
_UPDATABLE_SPEC_FIELDS = ["repo_code_hash", "configuration"]
diff --git a/src/dstack/api/utils.py b/src/dstack/api/utils.py
index 9471ce74b..303e71982 100644
--- a/src/dstack/api/utils.py
+++ b/src/dstack/api/utils.py
@@ -2,6 +2,7 @@
from typing import Optional, Tuple
import yaml
+from pydantic import ValidationError
from dstack._internal.core.errors import ConfigurationError
from dstack._internal.core.models.configurations import AnyRunConfiguration
@@ -96,6 +97,8 @@ def _load_profile_from_path(profiles_path: Path, profile_name: Optional[str]) ->
config = ProfilesConfig.parse_obj(yaml.safe_load(f))
except FileNotFoundError:
return None
+ except ValidationError as e:
+ raise ConfigurationError(e)
if profile_name is None:
return config.default()