Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/docs/concepts/backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ gcloud projects list --format="json(projectId)"
compute.networks.updatePolicy
compute.regions.get
compute.regions.list
compute.reservations.list
compute.resourcePolicies.create
compute.resourcePolicies.delete
compute.routers.list
Expand All @@ -543,6 +544,9 @@ gcloud projects list --format="json(projectId)"
Also, the use of TPUs requires the `serviceAccountUser` role.
For TPU VMs, dstack will use the default service account.

If you plan to use shared reservations, the `compute.reservations.list`
permission is required in the project that owns the reservations.

??? info "Required APIs"
First, ensure the required APIs are enabled in your GCP `project_id`.

Expand Down
2 changes: 1 addition & 1 deletion docs/docs/guides/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ one of these features, `dstack` will only select offers from the backends that s
are only supported by the `aws`, `azure`, `gcp`, `nebius`, `oci`, and `vultr` backends,
as well as SSH fleets.
- [Reservations](../reference/dstack.yml/fleet.md#reservation)
are only supported by the `aws` backend.
are only supported by the `aws` and `gcp` backends.

#### Cause 8: dstack Sky balance

Expand Down
13 changes: 8 additions & 5 deletions src/dstack/_internal/core/backends/aws/compute.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import threading
from collections.abc import Iterable
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -34,7 +35,11 @@
get_user_data,
merge_tags,
)
from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
from dstack._internal.core.backends.base.offers import (
OfferModifier,
get_catalog_offers,
get_offers_disk_modifier,
)
from dstack._internal.core.errors import (
ComputeError,
NoCapacityError,
Expand Down Expand Up @@ -159,10 +164,8 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
)
return availability_offers

def get_offers_modifier(
self, requirements: Requirements
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]

def _get_offers_cached_key(self, requirements: Requirements) -> int:
# Requirements is not hashable, so we use a hack to get arguments hash
Expand Down
15 changes: 9 additions & 6 deletions src/dstack/_internal/core/backends/azure/compute.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import base64
import enum
import re
from collections.abc import Iterable
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Callable, Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple

from azure.core.credentials import TokenCredential
from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError
Expand Down Expand Up @@ -51,7 +52,11 @@
merge_tags,
requires_nvidia_proprietary_kernel_modules,
)
from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
from dstack._internal.core.backends.base.offers import (
OfferModifier,
get_catalog_offers,
get_offers_disk_modifier,
)
from dstack._internal.core.consts import DSTACK_OS_IMAGE_WITH_PROPRIETARY_NVIDIA_KERNEL_MODULES
from dstack._internal.core.errors import ComputeError, NoCapacityError
from dstack._internal.core.models.backends.base import BackendType
Expand Down Expand Up @@ -108,10 +113,8 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
)
return offers_with_availability

def get_offers_modifier(
self, requirements: Requirements
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]

def create_instance(
self,
Expand Down
57 changes: 40 additions & 17 deletions src/dstack/_internal/core/backends/base/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
from gpuhunt import CPUArchitecture

from dstack._internal import settings
from dstack._internal.core.backends.base.offers import filter_offers_by_requirements
from dstack._internal.core.backends.base.offers import OfferModifier, filter_offers_by_requirements
from dstack._internal.core.consts import (
DSTACK_RUNNER_HTTP_PORT,
DSTACK_RUNNER_SSH_PORT,
DSTACK_SHIM_HTTP_PORT,
)
from dstack._internal.core.models.backends.base import BackendType
from dstack._internal.core.models.configurations import LEGACY_REPO_DIR
from dstack._internal.core.models.gateways import (
GatewayComputeConfiguration,
Expand Down Expand Up @@ -168,17 +169,13 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
"""
pass

def get_offers_modifier(
self, requirements: Requirements
) -> Optional[
Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]
]:
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
"""
Returns a modifier function that modifies offers before they are filtered by requirements.
Can return `None` to exclude the offer.
Returns functions that modify offers before they are filtered by requirements.
A modifier function can return `None` to exclude the offer.
E.g. can be used to set appropriate disk size based on requirements.
"""
return None
return []

def get_offers_post_filter(
self, requirements: Requirements
Expand All @@ -191,14 +188,7 @@ def get_offers_post_filter(

def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
offers = self._get_all_offers_with_availability_cached()
modifier = self.get_offers_modifier(requirements)
if modifier is not None:
modified_offers = []
for o in offers:
modified_offer = modifier(o)
if modified_offer is not None:
modified_offers.append(modified_offer)
offers = modified_offers
offers = self.__apply_modifiers(offers, self.get_offers_modifiers(requirements))
offers = filter_offers_by_requirements(offers, requirements)
post_filter = self.get_offers_post_filter(requirements)
if post_filter is not None:
Expand All @@ -212,6 +202,20 @@ def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvaila
def _get_all_offers_with_availability_cached(self) -> List[InstanceOfferWithAvailability]:
return self.get_all_offers_with_availability()

@staticmethod
def __apply_modifiers(
offers: Iterable[InstanceOfferWithAvailability], modifiers: Iterable[OfferModifier]
) -> list[InstanceOfferWithAvailability]:
modified_offers = []
for offer in offers:
for modifier in modifiers:
offer = modifier(offer)
if offer is None:
break
else:
modified_offers.append(offer)
return modified_offers


class ComputeWithFilteredOffersCached(ABC):
"""
Expand Down Expand Up @@ -341,6 +345,15 @@ class ComputeWithMultinodeSupport:
class ComputeWithReservationSupport:
"""
Must be subclassed to support provisioning from reservations.

The following is expected from a backend that supports reservations:

- `get_offers` respects `Requirements.reservation` if set, and only returns
offers that can be provisioned in the configured reservation. It can
adjust some offer properties such as `availability` and
`availability_zones` if necessary.
- `create_instance` respects `InstanceConfig.reservation` if set, and
provisions the instance in the configured reservation.
"""

pass
Expand Down Expand Up @@ -391,6 +404,16 @@ def is_suitable_placement_group(
"""
pass

def are_placement_groups_compatible_with_reservations(self, backend_type: BackendType) -> bool:
"""
Whether placement groups can be used for instances provisioned in reservations.

Arguments:
backend_type: matches the backend type of this compute, unless this compute is a proxy
for other backends (dstack Sky)
"""
return True


class ComputeWithGatewaySupport(ABC):
"""
Expand Down
5 changes: 4 additions & 1 deletion src/dstack/_internal/core/backends/base/offers.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,12 @@ def choose_disk_size_mib(
return round(disk_size_gib * 1024)


OfferModifier = Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]


def get_offers_disk_modifier(
configurable_disk_size: Range[Memory], requirements: Requirements
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
) -> OfferModifier:
"""
Returns a func that modifies offers disk by setting min value that satisfies both
`configurable_disk_size` and `requirements`.
Expand Down
15 changes: 9 additions & 6 deletions src/dstack/_internal/core/backends/datacrunch/compute.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Callable, Dict, List, Optional
from collections.abc import Iterable
from typing import Dict, List, Optional

from datacrunch import DataCrunchClient
from datacrunch.exceptions import APIException
Expand All @@ -12,7 +13,11 @@
generate_unique_instance_name,
get_shim_commands,
)
from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
from dstack._internal.core.backends.base.offers import (
OfferModifier,
get_catalog_offers,
get_offers_disk_modifier,
)
from dstack._internal.core.backends.datacrunch.models import DataCrunchConfig
from dstack._internal.core.errors import NoCapacityError
from dstack._internal.core.models.backends.base import BackendType
Expand Down Expand Up @@ -59,10 +64,8 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
offers_with_availability = self._get_offers_with_availability(offers)
return offers_with_availability

def get_offers_modifier(
self, requirements: Requirements
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]

def _get_offers_with_availability(
self, offers: List[InstanceOffer]
Expand Down
Loading