From dd07ad8efcdbad58c42dbdb810ceb48d0bab86a9 Mon Sep 17 00:00:00 2001 From: Jvst Me Date: Sun, 14 Sep 2025 00:07:10 +0200 Subject: [PATCH] Support A4 instances with the B200 GPU on GCP This implementation allows provisioning both individual A4 instances and clusters, but clusters do not yet support high-speed networking, since it requires a [different network setup](https://cloud.google.com/ai-hypercomputer/docs/create/create-vm#setup-network). --- src/dstack/_internal/core/backends/base/offers.py | 1 + src/dstack/_internal/core/backends/gcp/compute.py | 4 ++-- src/dstack/_internal/core/backends/gcp/resources.py | 2 ++ src/dstack/_internal/server/background/tasks/common.py | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/dstack/_internal/core/backends/base/offers.py b/src/dstack/_internal/core/backends/base/offers.py index d3d004172..7421c2877 100644 --- a/src/dstack/_internal/core/backends/base/offers.py +++ b/src/dstack/_internal/core/backends/base/offers.py @@ -20,6 +20,7 @@ SUPPORTED_GPUHUNT_FLAGS = [ "oci-spot", "lambda-arm", + "gcp-a4", ] diff --git a/src/dstack/_internal/core/backends/gcp/compute.py b/src/dstack/_internal/core/backends/gcp/compute.py index 506308ef4..4250bb02b 100644 --- a/src/dstack/_internal/core/backends/gcp/compute.py +++ b/src/dstack/_internal/core/backends/gcp/compute.py @@ -856,8 +856,8 @@ def _has_gpu_quota(quotas: Dict[str, float], resources: Resources) -> bool: gpu = resources.gpus[0] if _is_tpu(gpu.name): return True - if gpu.name == "H100": - # H100 and H100_MEGA quotas are not returned by `regions_client.list` + if gpu.name in ["B200", "H100"]: + # B200, H100 and H100_MEGA quotas are not returned by `regions_client.list` return True quota_name = f"NVIDIA_{gpu.name}_GPUS" if gpu.name == "A100" and gpu.memory_mib == 80 * 1024: diff --git a/src/dstack/_internal/core/backends/gcp/resources.py b/src/dstack/_internal/core/backends/gcp/resources.py index c56caddf9..b22012f63 100644 --- a/src/dstack/_internal/core/backends/gcp/resources.py +++ b/src/dstack/_internal/core/backends/gcp/resources.py @@ -19,6 +19,7 @@ DSTACK_GATEWAY_TAG = "dstack-gateway-instance" supported_accelerators = [ + {"accelerator_name": "nvidia-b200", "gpu_name": "B200", "memory_mb": 1024 * 180}, {"accelerator_name": "nvidia-a100-80gb", "gpu_name": "A100", "memory_mb": 1024 * 80}, {"accelerator_name": "nvidia-tesla-a100", "gpu_name": "A100", "memory_mb": 1024 * 40}, {"accelerator_name": "nvidia-l4", "gpu_name": "L4", "memory_mb": 1024 * 24}, @@ -476,5 +477,6 @@ def instance_type_supports_persistent_disk(instance_type_name: str) -> bool: "n4-", "h3-", "v6e", + "a4-", ] ) diff --git a/src/dstack/_internal/server/background/tasks/common.py b/src/dstack/_internal/server/background/tasks/common.py index 3f370c4aa..b048b9de0 100644 --- a/src/dstack/_internal/server/background/tasks/common.py +++ b/src/dstack/_internal/server/background/tasks/common.py @@ -19,4 +19,6 @@ def get_provisioning_timeout(backend_type: BackendType, instance_type_name: str) return timedelta(minutes=20) if backend_type == BackendType.VULTR and instance_type_name.startswith("vbm"): return timedelta(minutes=55) + if backend_type == BackendType.GCP and instance_type_name == "a4-highgpu-8g": + return timedelta(minutes=16) return timedelta(minutes=10)