diff --git a/pyproject.toml b/pyproject.toml index c6f20d662..5cceee68b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-multipart>=0.0.16", "filelock", "psutil", - "gpuhunt==0.1.10", + "gpuhunt==0.1.11", "argcomplete>=3.5.0", "ignore-python>=0.2.0", "orjson", diff --git a/src/dstack/_internal/core/backends/base/offers.py b/src/dstack/_internal/core/backends/base/offers.py index 0efb569e2..00269217c 100644 --- a/src/dstack/_internal/core/backends/base/offers.py +++ b/src/dstack/_internal/core/backends/base/offers.py @@ -24,6 +24,7 @@ "lambda-arm", "gcp-a4", "gcp-g4-preview", + "gcp-dws-calendar-mode", ] @@ -94,6 +95,7 @@ def catalog_item_to_offer( ), region=item.location, price=item.price, + backend_data=item.provider_data, ) diff --git a/src/dstack/_internal/core/backends/gcp/compute.py b/src/dstack/_internal/core/backends/gcp/compute.py index 7069b3f18..3fb4fb7b3 100644 --- a/src/dstack/_internal/core/backends/gcp/compute.py +++ b/src/dstack/_internal/core/backends/gcp/compute.py @@ -90,6 +90,10 @@ TPU_VERSIONS = [tpu.name for tpu in KNOWN_TPUS] +class GCPOfferBackendData(CoreModel): + is_dws_calendar_mode: bool = False + + class GCPVolumeDiskBackendData(CoreModel): type: Literal["disk"] = "disk" disk_type: str @@ -202,6 +206,23 @@ def reservation_modifier( modifiers.append(get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)) return modifiers + def get_offers_post_filter( + self, requirements: Requirements + ) -> Optional[Callable[[InstanceOfferWithAvailability], bool]]: + if requirements.reservation is None: + + def reserved_offers_filter(offer: InstanceOfferWithAvailability) -> bool: + """Remove reserved-only offers""" + if GCPOfferBackendData.__response__.parse_obj( + offer.backend_data + ).is_dws_calendar_mode: + return False + return True + + return reserved_offers_filter + + return None + def terminate_instance( self, instance_id: str, region: str, backend_data: Optional[str] = None ) -> None: diff --git a/src/dstack/_internal/core/compatibility/runs.py b/src/dstack/_internal/core/compatibility/runs.py index a7ce3c63c..403009ae0 100644 --- a/src/dstack/_internal/core/compatibility/runs.py +++ b/src/dstack/_internal/core/compatibility/runs.py @@ -49,10 +49,20 @@ def get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[IncludeExcludeD job_submissions_excludes["job_provisioning_data"] = { "instance_type": {"resources": {"cpu_arch"}} } + jrd_offer_excludes = {} + if any( + js.job_runtime_data and js.job_runtime_data.offer for js in job_submissions + ) and all( + not js.job_runtime_data + or not js.job_runtime_data.offer + or not js.job_runtime_data.offer.backend_data + for js in job_submissions + ): + jrd_offer_excludes["backend_data"] = True if all(map(_should_exclude_job_submission_jrd_cpu_arch, job_submissions)): - job_submissions_excludes["job_runtime_data"] = { - "offer": {"instance": {"resources": {"cpu_arch"}}} - } + jrd_offer_excludes["instance"] = {"resources": {"cpu_arch"}} + if jrd_offer_excludes: + job_submissions_excludes["job_runtime_data"] = {"offer": jrd_offer_excludes} if all(js.exit_status is None for js in job_submissions): job_submissions_excludes["exit_status"] = True if all(js.status_message == "" for js in job_submissions): @@ -71,9 +81,18 @@ def get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[IncludeExcludeD latest_job_submission_excludes["job_provisioning_data"] = { "instance_type": {"resources": {"cpu_arch"}} } + latest_job_submission_jrd_offer_excludes = {} + if ( + latest_job_submission.job_runtime_data + and latest_job_submission.job_runtime_data.offer + and not latest_job_submission.job_runtime_data.offer.backend_data + ): + latest_job_submission_jrd_offer_excludes["backend_data"] = True if _should_exclude_job_submission_jrd_cpu_arch(latest_job_submission): + latest_job_submission_jrd_offer_excludes["instance"] = {"resources": {"cpu_arch"}} + if latest_job_submission_jrd_offer_excludes: latest_job_submission_excludes["job_runtime_data"] = { - "offer": {"instance": {"resources": {"cpu_arch"}}} + "offer": latest_job_submission_jrd_offer_excludes } if latest_job_submission.exit_status is None: latest_job_submission_excludes["exit_status"] = True diff --git a/src/dstack/_internal/core/models/instances.py b/src/dstack/_internal/core/models/instances.py index f1f802d54..204b423c1 100644 --- a/src/dstack/_internal/core/models/instances.py +++ b/src/dstack/_internal/core/models/instances.py @@ -1,6 +1,6 @@ import datetime from enum import Enum -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from uuid import UUID import gpuhunt @@ -184,6 +184,7 @@ class InstanceOffer(CoreModel): instance: InstanceType region: str price: float + backend_data: dict[str, Any] = {} class InstanceOfferWithAvailability(InstanceOffer): diff --git a/src/dstack/_internal/server/services/offers.py b/src/dstack/_internal/server/services/offers.py index be0b54bd8..1b2525449 100644 --- a/src/dstack/_internal/server/services/offers.py +++ b/src/dstack/_internal/server/services/offers.py @@ -215,6 +215,7 @@ def generate_shared_offer( ), region=offer.region, price=offer.price, + backend_data=offer.backend_data, availability=offer.availability, blocks=blocks, total_blocks=total_blocks,